-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH: Add sort parameter to set operations for some Indexes and adjust… #24521
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
6afbefe
1071f6b
fcdd676
97b0895
2edebf1
67a4e58
7b15248
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -200,7 +200,21 @@ def item_from_zerodim(val: object) -> object: | |
|
||
@cython.wraparound(False) | ||
@cython.boundscheck(False) | ||
def fast_unique_multiple(list arrays): | ||
def fast_unique_multiple(list arrays, sort: bool=True): | ||
""" | ||
Generate a list of unique values from a list of arrays. | ||
|
||
Parameters | ||
---------- | ||
list : array-like | ||
A list of array-like objects | ||
sort : boolean | ||
Whether or not to sort the resulting unique list | ||
|
||
Returns | ||
------- | ||
unique_list : list of unique values | ||
""" | ||
cdef: | ||
ndarray[object] buf | ||
Py_ssize_t k = len(arrays) | ||
|
@@ -217,10 +231,11 @@ def fast_unique_multiple(list arrays): | |
if val not in table: | ||
table[val] = stub | ||
uniques.append(val) | ||
try: | ||
uniques.sort() | ||
except Exception: | ||
pass | ||
if sort: | ||
try: | ||
uniques.sort() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. might see if using safe_sort makes sense here (you would have to import here, otherwise this would e circular) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, let's followup up later then (new PR). |
||
except Exception: | ||
pass | ||
|
||
return uniques | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2241,13 +2241,17 @@ def _get_reconciled_name_object(self, other): | |
return self._shallow_copy(name=name) | ||
return self | ||
|
||
def union(self, other): | ||
def union(self, other, sort=True): | ||
""" | ||
Form the union of two Index objects and sorts if possible. | ||
Form the union of two Index objects. | ||
|
||
Parameters | ||
---------- | ||
other : Index or array-like | ||
sort : bool, default True | ||
Sort the resulting index if possible | ||
|
||
.. versionadded:: 0.24.0 | ||
|
||
Returns | ||
------- | ||
|
@@ -2277,7 +2281,7 @@ def union(self, other): | |
if not is_dtype_union_equal(self.dtype, other.dtype): | ||
this = self.astype('O') | ||
other = other.astype('O') | ||
return this.union(other) | ||
return this.union(other, sort=sort) | ||
|
||
# TODO(EA): setops-refactor, clean all this up | ||
if is_period_dtype(self) or is_datetime64tz_dtype(self): | ||
|
@@ -2311,29 +2315,33 @@ def union(self, other): | |
else: | ||
result = lvals | ||
|
||
try: | ||
result = sorting.safe_sort(result) | ||
except TypeError as e: | ||
warnings.warn("%s, sort order is undefined for " | ||
"incomparable objects" % e, RuntimeWarning, | ||
stacklevel=3) | ||
if sort: | ||
try: | ||
result = sorting.safe_sort(result) | ||
except TypeError as e: | ||
warnings.warn("{}, sort order is undefined for " | ||
"incomparable objects".format(e), | ||
RuntimeWarning, stacklevel=3) | ||
|
||
# for subclasses | ||
return self._wrap_setop_result(other, result) | ||
|
||
def _wrap_setop_result(self, other, result): | ||
return self._constructor(result, name=get_op_result_name(self, other)) | ||
|
||
def intersection(self, other): | ||
def intersection(self, other, sort=True): | ||
""" | ||
Form the intersection of two Index objects. | ||
|
||
This returns a new Index with elements common to the index and `other`, | ||
preserving the order of the calling index. | ||
This returns a new Index with elements common to the index and `other`. | ||
|
||
Parameters | ||
---------- | ||
other : Index or array-like | ||
sort : bool, default True | ||
Sort the resulting index if possible | ||
|
||
.. versionadded:: 0.24.0 | ||
|
||
Returns | ||
------- | ||
|
@@ -2356,7 +2364,7 @@ def intersection(self, other): | |
if not is_dtype_equal(self.dtype, other.dtype): | ||
this = self.astype('O') | ||
other = other.astype('O') | ||
return this.intersection(other) | ||
return this.intersection(other, sort=sort) | ||
|
||
# TODO(EA): setops-refactor, clean all this up | ||
if is_period_dtype(self): | ||
|
@@ -2385,8 +2393,18 @@ def intersection(self, other): | |
indexer = indexer[indexer != -1] | ||
|
||
taken = other.take(indexer) | ||
|
||
if sort: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, I still think we can use |
||
taken = sorting.safe_sort(taken.values) | ||
if self.name != other.name: | ||
name = None | ||
else: | ||
name = self.name | ||
return self._shallow_copy(taken, name=name) | ||
|
||
if self.name != other.name: | ||
taken.name = None | ||
|
||
return taken | ||
|
||
def difference(self, other, sort=True): | ||
|
@@ -2442,16 +2460,18 @@ def difference(self, other, sort=True): | |
|
||
return this._shallow_copy(the_diff, name=result_name, freq=None) | ||
|
||
def symmetric_difference(self, other, result_name=None): | ||
def symmetric_difference(self, other, result_name=None, sort=True): | ||
""" | ||
Compute the symmetric difference of two Index objects. | ||
|
||
It's sorted if sorting is possible. | ||
|
||
Parameters | ||
---------- | ||
other : Index or array-like | ||
result_name : str | ||
sort : bool, default True | ||
Sort the resulting index if possible | ||
|
||
.. versionadded:: 0.24.0 | ||
|
||
Returns | ||
------- | ||
|
@@ -2496,10 +2516,11 @@ def symmetric_difference(self, other, result_name=None): | |
right_diff = other.values.take(right_indexer) | ||
|
||
the_diff = _concat._concat_compat([left_diff, right_diff]) | ||
try: | ||
the_diff = sorting.safe_sort(the_diff) | ||
except TypeError: | ||
pass | ||
if sort: | ||
try: | ||
the_diff = sorting.safe_sort(the_diff) | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
except TypeError: | ||
pass | ||
|
||
attribs = self._get_attributes_dict() | ||
attribs['name'] = result_name | ||
|
@@ -3226,8 +3247,12 @@ def join(self, other, how='left', level=None, return_indexers=False, | |
elif how == 'right': | ||
join_index = other | ||
elif how == 'inner': | ||
join_index = self.intersection(other) | ||
# TODO: sort=False here for backwards compat. It may | ||
# be better to use the sort parameter passed into join | ||
join_index = self.intersection(other, sort=False) | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
elif how == 'outer': | ||
# TODO: sort=True here for backwards compat. It may | ||
# be better to use the sort parameter passed into join | ||
join_index = self.union(other) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On Master There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add a TODO |
||
|
||
if sort: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -594,7 +594,7 @@ def _wrap_setop_result(self, other, result): | |
name = get_op_result_name(self, other) | ||
return self._shallow_copy(result, name=name, freq=None, tz=self.tz) | ||
|
||
def intersection(self, other): | ||
def intersection(self, other, sort=True): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
""" | ||
Specialized intersection for DatetimeIndex objects. May be much faster | ||
than Index.intersection | ||
|
@@ -617,7 +617,7 @@ def intersection(self, other): | |
other = DatetimeIndex(other) | ||
except (TypeError, ValueError): | ||
pass | ||
result = Index.intersection(self, other) | ||
result = Index.intersection(self, other, sort=sort) | ||
if isinstance(result, DatetimeIndex): | ||
if result.freq is None: | ||
result.freq = to_offset(result.inferred_freq) | ||
|
@@ -627,7 +627,7 @@ def intersection(self, other): | |
other.freq != self.freq or | ||
not other.freq.isAnchored() or | ||
(not self.is_monotonic or not other.is_monotonic)): | ||
result = Index.intersection(self, other) | ||
result = Index.intersection(self, other, sort=sort) | ||
# Invalidate the freq of `result`, which may not be correct at | ||
# this point, depending on the values. | ||
result.freq = None | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4473,7 +4473,7 @@ def _reindex_axis(obj, axis, labels, other=None): | |
|
||
labels = ensure_index(labels.unique()) | ||
if other is not None: | ||
labels = ensure_index(other.unique()) & labels | ||
labels = ensure_index(other.unique()).intersection(labels, sort=False) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again, on Master |
||
if not labels.equals(ax): | ||
slicer = [slice(None, None)] * obj.ndim | ||
slicer[axis] = labels | ||
|
Uh oh!
There was an error while loading. Please reload this page.