diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 077bfb6a88f0c..18327be5f1c5a 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -767,8 +767,13 @@ def get_loc(self, key, method=None): key = Interval(left, right, key.closed) else: key = self._maybe_cast_slice_bound(key, 'left', None) - - start, stop = self._find_non_overlapping_monotonic_bounds(key) + try: + start, stop = self._find_non_overlapping_monotonic_bounds(key) + except TypeError: + # get_loc should raise KeyError + # TODO(py3): use raise from. + raise KeyError('Key {!r} is hashable but of incorrect type.' + .format(key)) if start is None or stop is None: return slice(start, stop) @@ -786,7 +791,12 @@ def get_loc(self, key, method=None): left, right = _get_interval_closed_bounds(key) return self._engine.get_loc_interval(left, right) else: - return self._engine.get_loc(key) + try: + return self._engine.get_loc(key) + except TypeError: + msg = ('Key {!r} not found (does match index type {}).' + .format(key, self.dtype)) + raise KeyError(msg) def get_value(self, series, key): if com.is_bool_indexer(key): @@ -800,7 +810,7 @@ def get_value(self, series, key): try: loc = self.get_loc(key) - except TypeError: + except KeyError: # we didn't find exact intervals or are non-unique msg = "unable to slice with this key: {key}".format(key=key) raise ValueError(msg) @@ -820,11 +830,21 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): return np.arange(len(self), dtype='intp') if self.is_non_overlapping_monotonic: - start, stop = self._find_non_overlapping_monotonic_bounds(target) - - start_plus_one = start + 1 - if not ((start_plus_one < stop).any()): - return np.where(start_plus_one == stop, start, -1) + try: + start, stop = ( + self._find_non_overlapping_monotonic_bounds(target) + ) + start_plus_one = start + 1 + if not ((start_plus_one < stop).any()): + return np.where(start_plus_one == stop, start, -1) + except TypeError as err: + # Only raise a type error when the types are not + # orderable, such as when the caller is combining + # an interval index with an integer index. + # (see test_append_different_columns_types_raises + # in pandas/tests/reshape/test_concat.py for more examples). + if err.args and 'unorderable types:' in err.args[0]: + raise if not self.is_unique: raise ValueError("cannot handle non-unique indices") @@ -835,7 +855,13 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): # non IntervalIndex else: - indexer = np.concatenate([self.get_loc(i) for i in target]) + vals = [] + for i in target: + try: + vals.append(self.get_loc(i)) + except KeyError: + vals.append(-1) + indexer = np.array(vals).flatten() return ensure_platform_int(indexer) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index ba451da10573a..5d96ed1d901b2 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -429,6 +429,15 @@ def test_get_loc_value(self): with pytest.raises(KeyError, match=r"^1\.5$"): idx.get_loc(1.5) + # GH25087, test get_loc returns key error for interval indexes + key = 'a' + msg = 'Key {!r} is hashable but of incorrect type'.format(key) + with pytest.raises(KeyError, match=msg): + idx.get_loc(key) + idx = pd.interval_range(0, 1.0) + with pytest.raises(KeyError, match=msg): + idx.get_loc('a') + # To be removed, replaced by test_interval_new.py (see #16316, #16386) def slice_locs_cases(self, breaks): # TODO: same tests for more index types @@ -581,6 +590,14 @@ def test_get_indexer(self): expected = np.array([-1, 1], dtype='intp') tm.assert_numpy_array_equal(actual, expected) + actual = self.index.get_indexer(['a', 1]) + expected = np.array([-1, 0], dtype='intp') + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index.get_indexer(['a', 1, 'b']) + expected = np.array([-1, 0, -1], dtype='intp') + tm.assert_numpy_array_equal(actual, expected) + # To be removed, replaced by test_interval_new.py (see #16316, #16386) def test_get_indexer_subintervals(self): @@ -615,6 +632,24 @@ def test_get_indexer_length_one(self, item, closed): expected = np.array([0] * len(item), dtype='intp') tm.assert_numpy_array_equal(result, expected) + @pytest.mark.parametrize('index,value,expected_index', [ + (pd.interval_range(0, 1), 0.5, 0), + (pd.interval_range(0, 3), 0.5, 0), + (pd.IntervalIndex.from_tuples([(1, 3), (2, 4), (0, 2)]), 0.5, 2) + ]) + def test_get_indexer_errors(self, index, value, expected_index): + actual = index.get_indexer(['a']) + expected = np.array([-1], dtype='intp') + assert tm.assert_numpy_array_equal(actual, expected) + + actual = index.get_indexer(['a', 'b']) + expected = np.array([-1, -1], dtype='intp') + assert tm.assert_numpy_array_equal(actual, expected) + + actual = index.get_indexer(['a', value, 'b']) + expected = np.array([-1, expected_index, -1], dtype='intp') + assert tm.assert_numpy_array_equal(actual, expected) + # Make consistent with test_interval_new.py (see #16316, #16386) @pytest.mark.parametrize('arrays', [ (date_range('20180101', periods=4), date_range('20180103', periods=4)), diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index bb065e7da53f6..2362ddf5db4c3 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -78,6 +78,12 @@ def test_getitem_scalar(self): result = s[cats[0]] assert result == expected + def test_contains_interval_range(self): + """Check we can use contains """ + intervals = pd.interval_range(0.0, 1.0) + cats = pd.Categorical(intervals) + assert 'gg' not in cats + def test_slicing_directly(self): cat = Categorical(["a", "b", "c", "d", "a", "b", "c"]) sliced = cat[3]