Skip to content

TST: add tests for take() on empty arrays #20582

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
18 changes: 18 additions & 0 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,11 +458,23 @@ def take(self, indexer, allow_fill=True, fill_value=None):
Fill value to replace -1 values with. If applicable, this should
use the sentinel missing value for this type.

Returns
-------
ExtensionArray

Raises
------
IndexError
When the indexer is out of bounds for the array.

Notes
-----
This should follow pandas' semantics where -1 indicates missing values.
Positions where indexer is ``-1`` should be filled with the missing
value for this type.
This gives rise to the special case of a take on an empty
ExtensionArray that does not raises an IndexError straight away
when the `indexer` is all ``-1``.

This is called by ``Series.__getitem__``, ``.loc``, ``iloc``, when the
indexer is a sequence of values.
Expand All @@ -477,6 +489,12 @@ def take(self, indexer, allow_fill=True, fill_value=None):
def take(self, indexer, allow_fill=True, fill_value=None):
indexer = np.asarray(indexer)
mask = indexer == -1

# take on empty array not handled as desired by numpy
# in case of -1 (all missing take)
if not len(self) and mask.all():
return type(self)([np.nan] * len(indexer))

result = self.data.take(indexer)
result[mask] = np.nan # NA for this type
return type(self)(result)
Expand Down
47 changes: 47 additions & 0 deletions pandas/tests/extension/base/getitem.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import pytest
import numpy as np

import pandas as pd
import pandas.util.testing as tm

from .base import BaseExtensionTests

Expand Down Expand Up @@ -120,3 +122,48 @@ def test_take_sequence(self, data):
assert result.iloc[0] == data[0]
assert result.iloc[1] == data[1]
assert result.iloc[2] == data[3]

def test_take(self, data, na_value, na_cmp):
result = data.take([0, -1])
assert result.dtype == data.dtype
assert result[0] == data[0]
na_cmp(result[1], na_value)

with tm.assert_raises_regex(IndexError, "out of bounds"):
data.take([len(data) + 1])

def test_take_empty(self, data, na_value, na_cmp):
empty = data[:0]
result = empty.take([-1])
na_cmp(result[0], na_value)

with tm.assert_raises_regex(IndexError, "cannot do a non-empty take"):
empty.take([0, 1])
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TomAugspurger I didn't find any existing tests for take, so not sure the indexing tests is the best place (maybe rather the BaseMethodsTests).
And, I could also add tests for the actual use cases where you get this (eg reindex on an empty series)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like the right place for testing a behavior specific to take.


@pytest.mark.xfail(reason="Series.take with extension array buggy for -1")
def test_take_series(self, data):
s = pd.Series(data)
result = s.take([0, -1])
expected = pd.Series(
data._constructor_from_sequence([data[0], data[len(data) - 1]]),
index=[0, len(data) - 1])
self.assert_series_equal(result, expected)

def test_reindex(self, data, na_value):
s = pd.Series(data)
result = s.reindex([0, 1, 3])
expected = pd.Series(data.take([0, 1, 3]), index=[0, 1, 3])
self.assert_series_equal(result, expected)

n = len(data)
result = s.reindex([-1, 0, n])
expected = pd.Series(
data._constructor_from_sequence([na_value, data[0], na_value]),
index=[-1, 0, n])
self.assert_series_equal(result, expected)

result = s.reindex([n, n + 1])
expected = pd.Series(
data._constructor_from_sequence([na_value, na_value]),
index=[n, n + 1])
self.assert_series_equal(result, expected)
13 changes: 13 additions & 0 deletions pandas/tests/extension/category/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,19 @@ def test_getitem_scalar(self):
# to break things by changing.
pass

@pytest.mark.xfail(reason="Categorical.take buggy")
def test_take(self):
# TODO remove this once Categorical.take is fixed
pass

@pytest.mark.xfail(reason="Categorical.take buggy")
def test_take_empty(self):
pass

@pytest.mark.xfail(reason="test not written correctly for categorical")
def test_reindex(self):
pass


class TestSetitem(base.BaseSetitemTests):
pass
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/extension/decimal/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ def take(self, indexer, allow_fill=True, fill_value=None):
indexer = np.asarray(indexer)
mask = indexer == -1

# take on empty array not handled as desired by numpy in case of -1
if not len(self) and mask.all():
return type(self)([self._na_value] * len(indexer))

indexer = _ensure_platform_int(indexer)
out = self.values.take(indexer)
out[mask] = self._na_value
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/extension/json/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,12 @@ def isna(self):
return np.array([x == self._na_value for x in self.data])

def take(self, indexer, allow_fill=True, fill_value=None):
output = [self.data[loc] if loc != -1 else self._na_value
for loc in indexer]
try:
output = [self.data[loc] if loc != -1 else self._na_value
for loc in indexer]
except IndexError:
raise IndexError("Index is out of bounds or cannot do a "
"non-empty take from an empty array.")
return self._constructor_from_sequence(output)

def copy(self, deep=False):
Expand Down