diff --git a/src/libcore/str/pattern.rs b/src/libcore/str/pattern.rs index 677c0ecc33d7f..089d691773a1b 100644 --- a/src/libcore/str/pattern.rs +++ b/src/libcore/str/pattern.rs @@ -284,7 +284,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> { #[inline] fn next(&mut self) -> SearchStep { let old_finger = self.finger; - let slice = unsafe { self.haystack.get_unchecked(old_finger..self.haystack.len()) }; + let slice = unsafe { self.haystack.get_unchecked(old_finger..self.finger_back) }; let mut iter = slice.chars(); let old_len = iter.iter.len(); if let Some(ch) = iter.next() { @@ -304,7 +304,8 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> { fn next_match(&mut self) -> Option<(usize, usize)> { loop { // get the haystack after the last character found - let bytes = if let Some(slice) = self.haystack.as_bytes().get(self.finger..) { + let bytes = if let Some(slice) = self.haystack.as_bytes() + .get(self.finger..self.finger_back) { slice } else { return None; @@ -340,7 +341,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> { } } else { // found nothing, exit - self.finger = self.haystack.len(); + self.finger = self.finger_back; return None; } } @@ -353,7 +354,7 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> { #[inline] fn next_back(&mut self) -> SearchStep { let old_finger = self.finger_back; - let slice = unsafe { self.haystack.slice_unchecked(0, old_finger) }; + let slice = unsafe { self.haystack.slice_unchecked(self.finger, old_finger) }; let mut iter = slice.chars(); let old_len = iter.iter.len(); if let Some(ch) = iter.next_back() { @@ -374,7 +375,7 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> { let haystack = self.haystack.as_bytes(); loop { // get the haystack up to but not including the last character searched - let bytes = if let Some(slice) = haystack.get(..self.finger_back) { + let bytes = if let Some(slice) = haystack.get(self.finger..self.finger_back) { slice } else { return None; @@ -382,6 +383,9 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> { // the last byte of the utf8 encoded needle let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size - 1) }; if let Some(index) = memchr::memrchr(last_byte, bytes) { + // we searched a slice that was offset by self.finger, + // add self.finger to recoup the original index + let index = self.finger + index; // memrchr will return the index of the byte we wish to // find. In case of an ASCII character, this is indeed // were we wish our new finger to be ("after" the found @@ -412,7 +416,7 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> { // found the last byte when searching in reverse. self.finger_back = index; } else { - self.finger_back = 0; + self.finger_back = self.finger; // found nothing, exit return None; } diff --git a/src/libcore/tests/pattern.rs b/src/libcore/tests/pattern.rs index d0fd15263b219..cfa3b7ee6640f 100644 --- a/src/libcore/tests/pattern.rs +++ b/src/libcore/tests/pattern.rs @@ -262,3 +262,41 @@ fn test_reverse_search_shared_bytes() { [InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done] ); } + +#[test] +fn double_ended_regression_test() { + // https://github.com/rust-lang/rust/issues/47175 + // Ensures that double ended searching comes to a convergence + search_asserts!("abcdeabcdeabcde", 'a', "alternating double ended search", + [next_match, next_match_back, next_match, next_match_back], + [InRange(0, 1), InRange(10, 11), InRange(5, 6), Done] + ); + search_asserts!("abcdeabcdeabcde", 'a', "triple double ended search for a", + [next_match, next_match_back, next_match_back, next_match_back], + [InRange(0, 1), InRange(10, 11), InRange(5, 6), Done] + ); + search_asserts!("abcdeabcdeabcde", 'd', "triple double ended search for d", + [next_match, next_match_back, next_match_back, next_match_back], + [InRange(3, 4), InRange(13, 14), InRange(8, 9), Done] + ); + search_asserts!(STRESS, 'Á', "Double ended search for two-byte Latin character", + [next_match, next_match_back, next_match, next_match_back], + [InRange(0, 2), InRange(32, 34), InRange(8, 10), Done] + ); + search_asserts!(STRESS, '각', "Reverse double ended search for three-byte Hangul character", + [next_match_back, next_back, next_match, next, next_match_back, next_match], + [InRange(34, 37), Rejects(32, 34), InRange(19, 22), Rejects(22, 25), InRange(28, 31), Done] + ); + search_asserts!(STRESS, 'āļ', "Double ended search for three-byte Thai character", + [next_match, next_back, next, next_match_back, next_match], + [InRange(22, 25), Rejects(47, 48), Rejects(25, 28), InRange(40, 43), Done] + ); + search_asserts!(STRESS, '😁', "Double ended search for four-byte emoji", + [next_match_back, next, next_match, next_back, next_match], + [InRange(43, 47), Rejects(0, 2), InRange(15, 19), Rejects(40, 43), Done] + ); + search_asserts!(STRESS, 'ꁁ', "Double ended search for three-byte Yi character with repeated bytes", + [next_match, next, next_match_back, next_back, next_match], + [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(34, 37), Done] + ); +}