-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
CLN: Prune unnecessary indexing code #27576
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ebd9d03
e0e039f
330f8af
ca16db2
9dafe60
f21e7d5
dd5fc2d
59e3935
e165eca
743e7c0
5c4927e
f0559a1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1470,9 +1470,6 @@ def dropna(self, how="any"): | |
return self.copy(codes=new_codes, deep=True) | ||
|
||
def get_value(self, series, key): | ||
# somewhat broken encapsulation | ||
from pandas.core.indexing import maybe_droplevels | ||
|
||
# Label-based | ||
s = com.values_from_object(series) | ||
k = com.values_from_object(key) | ||
|
@@ -2709,7 +2706,7 @@ def _maybe_to_slice(loc): | |
|
||
return _maybe_to_slice(loc) if len(loc) != stop - start else slice(start, stop) | ||
|
||
def get_loc_level(self, key, level=0, drop_level=True): | ||
def get_loc_level(self, key, level=0, drop_level: bool = True): | ||
""" | ||
Get both the location for the requested label(s) and the | ||
resulting sliced index. | ||
|
@@ -2750,7 +2747,8 @@ def get_loc_level(self, key, level=0, drop_level=True): | |
(1, None) | ||
""" | ||
|
||
def maybe_droplevels(indexer, levels, drop_level): | ||
# different name to distinguish from maybe_droplevels | ||
def maybe_mi_droplevels(indexer, levels, drop_level: bool): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in future maybe pull this out to a module level function |
||
if not drop_level: | ||
return self[indexer] | ||
# kludgearound | ||
|
@@ -2780,7 +2778,7 @@ def maybe_droplevels(indexer, levels, drop_level): | |
|
||
result = loc if result is None else result & loc | ||
|
||
return result, maybe_droplevels(result, level, drop_level) | ||
return result, maybe_mi_droplevels(result, level, drop_level) | ||
|
||
level = self._get_level_number(level) | ||
|
||
|
@@ -2793,7 +2791,7 @@ def maybe_droplevels(indexer, levels, drop_level): | |
try: | ||
if key in self.levels[0]: | ||
indexer = self._get_level_indexer(key, level=level) | ||
new_index = maybe_droplevels(indexer, [0], drop_level) | ||
new_index = maybe_mi_droplevels(indexer, [0], drop_level) | ||
return indexer, new_index | ||
except TypeError: | ||
pass | ||
|
@@ -2808,7 +2806,7 @@ def partial_selection(key, indexer=None): | |
ilevels = [ | ||
i for i in range(len(key)) if key[i] != slice(None, None) | ||
] | ||
return indexer, maybe_droplevels(indexer, ilevels, drop_level) | ||
return indexer, maybe_mi_droplevels(indexer, ilevels, drop_level) | ||
|
||
if len(key) == self.nlevels and self.is_unique: | ||
# Complete key in unique index -> standard get_loc | ||
|
@@ -2843,10 +2841,10 @@ def partial_selection(key, indexer=None): | |
if indexer is None: | ||
indexer = slice(None, None) | ||
ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] | ||
return indexer, maybe_droplevels(indexer, ilevels, drop_level) | ||
return indexer, maybe_mi_droplevels(indexer, ilevels, drop_level) | ||
else: | ||
indexer = self._get_level_indexer(key, level=level) | ||
return indexer, maybe_droplevels(indexer, [level], drop_level) | ||
return indexer, maybe_mi_droplevels(indexer, [level], drop_level) | ||
|
||
def _get_level_indexer(self, key, level=0, indexer=None): | ||
# return an indexer, boolean array or a slice showing where the key is | ||
|
@@ -3464,3 +3462,34 @@ def _sparsify(label_list, start=0, sentinel=""): | |
|
||
def _get_na_rep(dtype): | ||
return {np.datetime64: "NaT", np.timedelta64: "NaT"}.get(dtype, "NaN") | ||
|
||
|
||
def maybe_droplevels(index, key): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a doc-string There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure |
||
""" | ||
Attempt to drop level or levels from the given index. | ||
|
||
Parameters | ||
---------- | ||
index: Index | ||
key : scalar or tuple | ||
|
||
Returns | ||
------- | ||
Index | ||
""" | ||
# drop levels | ||
original_index = index | ||
if isinstance(key, tuple): | ||
for _ in key: | ||
try: | ||
index = index.droplevel(0) | ||
except ValueError: | ||
# we have dropped too much, so back out | ||
return original_index | ||
else: | ||
try: | ||
index = index.droplevel(0) | ||
except ValueError: | ||
pass | ||
|
||
return index |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -226,7 +226,7 @@ def _validate_key(self, key, axis: int): | |
def _has_valid_tuple(self, key: Tuple): | ||
""" check the key for valid keys across my indexer """ | ||
for i, k in enumerate(key): | ||
if i >= self.obj.ndim: | ||
if i >= self.ndim: | ||
raise IndexingError("Too many indexers") | ||
try: | ||
self._validate_key(k, i) | ||
|
@@ -254,7 +254,7 @@ def _convert_tuple(self, key, is_setter: bool = False): | |
keyidx.append(slice(None)) | ||
else: | ||
for i, k in enumerate(key): | ||
if i >= self.obj.ndim: | ||
if i >= self.ndim: | ||
raise IndexingError("Too many indexers") | ||
idx = self._convert_to_indexer(k, axis=i, is_setter=is_setter) | ||
keyidx.append(idx) | ||
|
@@ -286,7 +286,7 @@ def _has_valid_positional_setitem_indexer(self, indexer): | |
raise IndexError("{0} cannot enlarge its target object".format(self.name)) | ||
else: | ||
if not isinstance(indexer, tuple): | ||
indexer = self._tuplify(indexer) | ||
indexer = _tuplify(self.ndim, indexer) | ||
for ax, i in zip(self.obj.axes, indexer): | ||
if isinstance(i, slice): | ||
# should check the stop slice? | ||
|
@@ -401,7 +401,7 @@ def _setitem_with_indexer(self, indexer, value): | |
assert info_axis == 1 | ||
|
||
if not isinstance(indexer, tuple): | ||
indexer = self._tuplify(indexer) | ||
indexer = _tuplify(self.ndim, indexer) | ||
|
||
if isinstance(value, ABCSeries): | ||
value = self._align_series(indexer, value) | ||
|
@@ -670,7 +670,7 @@ def ravel(i): | |
aligners = [not com.is_null_slice(idx) for idx in indexer] | ||
sum_aligners = sum(aligners) | ||
single_aligner = sum_aligners == 1 | ||
is_frame = self.obj.ndim == 2 | ||
is_frame = self.ndim == 2 | ||
obj = self.obj | ||
|
||
# are we a single alignable value on a non-primary | ||
|
@@ -731,7 +731,7 @@ def ravel(i): | |
raise ValueError("Incompatible indexer with Series") | ||
|
||
def _align_frame(self, indexer, df: ABCDataFrame): | ||
is_frame = self.obj.ndim == 2 | ||
is_frame = self.ndim == 2 | ||
|
||
if isinstance(indexer, tuple): | ||
|
||
|
@@ -867,7 +867,7 @@ def _handle_lowerdim_multi_index_axis0(self, tup: Tuple): | |
except KeyError as ek: | ||
# raise KeyError if number of indexers match | ||
# else IndexingError will be raised | ||
if len(tup) <= self.obj.index.nlevels and len(tup) > self.obj.ndim: | ||
if len(tup) <= self.obj.index.nlevels and len(tup) > self.ndim: | ||
raise ek | ||
except Exception as e1: | ||
if isinstance(tup[0], (slice, Index)): | ||
|
@@ -900,7 +900,7 @@ def _getitem_lowerdim(self, tup: Tuple): | |
if result is not None: | ||
return result | ||
|
||
if len(tup) > self.obj.ndim: | ||
if len(tup) > self.ndim: | ||
raise IndexingError("Too many indexers. handle elsewhere") | ||
|
||
# to avoid wasted computation | ||
|
@@ -1274,11 +1274,6 @@ def _convert_to_indexer( | |
return {"key": obj} | ||
raise | ||
|
||
def _tuplify(self, loc): | ||
tup = [slice(None, None) for _ in range(self.ndim)] | ||
tup[0] = loc | ||
return tuple(tup) | ||
|
||
def _get_slice_axis(self, slice_obj: slice, axis: int): | ||
# caller is responsible for ensuring non-None axis | ||
obj = self.obj | ||
|
@@ -2077,6 +2072,8 @@ def _getitem_tuple(self, tup: Tuple): | |
|
||
# if the dim was reduced, then pass a lower-dim the next time | ||
if retval.ndim < self.ndim: | ||
# TODO: this is never reached in tests; can we confirm that | ||
# it is impossible? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @toobaz any thoughts on showing this is unreachable? If we can confirm this is the case, then this chunk of the code can be shared with the other _getitem_tuple method above (pinging you since you wrote the wiki page on simplifying this code) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. looks like this also wasnt hit in 0.24.2 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And for context, example case which triggers on that commit but not on master Codeimport numpy as np
from pandas import *
mi_int = DataFrame(
np.random.randn(3, 3),
columns=[[2, 2, 4], [6, 8, 10]],
index=[[4, 4, 8], [8, 10, 12]],
)
# this triggers the code path when testing against 9f0dc3befb
rs = mi_int.iloc[:, 2] There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think that path is ever reached. It should be reached by something like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (but no major objections to removing for now) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @pilkibun there is another un-hit block on lines 440-450. any idea what that was intended to catch? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this was related to Panel indexing, can probably go. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
No objection |
||
axis -= 1 | ||
|
||
# try to get for the next axis | ||
|
@@ -2152,7 +2149,7 @@ def _convert_to_indexer( | |
) | ||
|
||
|
||
class _ScalarAccessIndexer(_NDFrameIndexer): | ||
class _ScalarAccessIndexer(_NDFrameIndexerBase): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The only _NDFrameIndexer method these use is _tuplify, which is why this PR takes that out and makes it a function instead of a method. Much easier to reason about these classes now |
||
""" access scalars quickly """ | ||
|
||
def _convert_key(self, key, is_setter: bool = False): | ||
|
@@ -2178,8 +2175,8 @@ def __setitem__(self, key, value): | |
key = com.apply_if_callable(key, self.obj) | ||
|
||
if not isinstance(key, tuple): | ||
key = self._tuplify(key) | ||
if len(key) != self.obj.ndim: | ||
key = _tuplify(self.ndim, key) | ||
if len(key) != self.ndim: | ||
raise ValueError("Not enough indexers for scalar access (setting)!") | ||
key = list(self._convert_key(key, is_setter=True)) | ||
key.append(value) | ||
|
@@ -2309,9 +2306,6 @@ class _iAtIndexer(_ScalarAccessIndexer): | |
|
||
_takeable = True | ||
|
||
def _has_valid_setitem_indexer(self, indexer): | ||
self._has_valid_positional_setitem_indexer(indexer) | ||
|
||
def _convert_key(self, key, is_setter: bool = False): | ||
""" require integer args (and convert to label arguments) """ | ||
for a, i in zip(self.obj.axes, key): | ||
|
@@ -2320,6 +2314,25 @@ def _convert_key(self, key, is_setter: bool = False): | |
return key | ||
|
||
|
||
def _tuplify(ndim: int, loc) -> tuple: | ||
""" | ||
Given an indexer for the first dimension, create an equivalent tuple | ||
for indexing over all dimensions. | ||
|
||
Parameters | ||
---------- | ||
ndim : int | ||
loc : object | ||
|
||
Returns | ||
------- | ||
tuple | ||
""" | ||
tup = [slice(None, None) for _ in range(ndim)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a doc-string There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. docstrings added. will take another look at removing the discussed lines in the next pass |
||
tup[0] = loc | ||
return tuple(tup) | ||
|
||
|
||
def convert_to_index_sliceable(obj, key): | ||
""" | ||
if we are index sliceable, then return my slicer, otherwise return None | ||
|
@@ -2469,25 +2482,6 @@ def need_slice(obj): | |
) | ||
|
||
|
||
def maybe_droplevels(index, key): | ||
# drop levels | ||
original_index = index | ||
if isinstance(key, tuple): | ||
for _ in key: | ||
try: | ||
index = index.droplevel(0) | ||
except ValueError: | ||
# we have dropped too much, so back out | ||
return original_index | ||
else: | ||
try: | ||
index = index.droplevel(0) | ||
except ValueError: | ||
pass | ||
|
||
return index | ||
|
||
|
||
def _non_reducing_slice(slice_): | ||
""" | ||
Ensurse that a slice doesn't reduce to a Series or Scalar. | ||
|
Uh oh!
There was an error while loading. Please reload this page.