From 08504fbb0b05abdd9543f08102b0d6275dde210c Mon Sep 17 00:00:00 2001 From: Shotaro Yamada Date: Fri, 2 Mar 2018 13:50:59 +0900 Subject: [PATCH 1/3] Optimize str::repeat --- src/liballoc/lib.rs | 1 + src/liballoc/str.rs | 37 ++++++++++++++++++++++++++++++++++--- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/src/liballoc/lib.rs b/src/liballoc/lib.rs index d250cfe1880fc..cb43d5bee78ca 100644 --- a/src/liballoc/lib.rs +++ b/src/liballoc/lib.rs @@ -124,6 +124,7 @@ #![feature(allocator_internals)] #![feature(on_unimplemented)] #![feature(exact_chunks)] +#![feature(pointer_methods)] #![cfg_attr(not(test), feature(fused, fn_traits, placement_new_protocol, swap_with_slice, i128))] #![cfg_attr(test, feature(test, box_heap))] diff --git a/src/liballoc/str.rs b/src/liballoc/str.rs index a00e3d17dd00f..08ba4a180ed53 100644 --- a/src/liballoc/str.rs +++ b/src/liballoc/str.rs @@ -43,6 +43,7 @@ use core::str as core_str; use core::str::pattern::Pattern; use core::str::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher}; use core::mem; +use core::ptr; use core::iter::FusedIterator; use std_unicode::str::{UnicodeStr, Utf16Encoder}; @@ -2066,9 +2067,39 @@ impl str { /// ``` #[stable(feature = "repeat_str", since = "1.16.0")] pub fn repeat(&self, n: usize) -> String { - let mut s = String::with_capacity(self.len() * n); - s.extend((0..n).map(|_| self)); - s + if n == 0 { + return String::new(); + } + + // n = 2^j + k (2^j > k) + + // 2^j: + let mut s = Vec::with_capacity(self.len() * n); + s.extend(self.as_bytes()); + let mut m = n >> 1; + while m > 0 { + let len = s.len(); + unsafe { + ptr::copy_nonoverlapping(s.as_ptr(), (s.as_mut_ptr() as *mut u8).add(len), len); + s.set_len(len * 2); + } + m >>= 1; + } + + // k: + let res_len = n * self.len(); + if res_len > s.len() { + unsafe { + ptr::copy_nonoverlapping( + s.as_ptr(), + (s.as_mut_ptr() as *mut u8).add(s.len()), + res_len - s.len(), + ); + s.set_len(res_len); + } + } + + unsafe { String::from_utf8_unchecked(s) } } /// Checks if all characters in this string are within the ASCII range. From 683bdc7f0a20236c7dd5a8a731951ef5db14b3be Mon Sep 17 00:00:00 2001 From: Shotaro Yamada Date: Sun, 4 Mar 2018 09:00:09 +0900 Subject: [PATCH 2/3] Add comments --- src/liballoc/str.rs | 60 ++++++++++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 20 deletions(-) diff --git a/src/liballoc/str.rs b/src/liballoc/str.rs index 08ba4a180ed53..6d153bf02b3ba 100644 --- a/src/liballoc/str.rs +++ b/src/liballoc/str.rs @@ -2071,35 +2071,55 @@ impl str { return String::new(); } - // n = 2^j + k (2^j > k) - - // 2^j: - let mut s = Vec::with_capacity(self.len() * n); - s.extend(self.as_bytes()); - let mut m = n >> 1; - while m > 0 { - let len = s.len(); - unsafe { - ptr::copy_nonoverlapping(s.as_ptr(), (s.as_mut_ptr() as *mut u8).add(len), len); - s.set_len(len * 2); + // If `n` is larger than zero, it can be split as + // `n = 2^expn + rem (2^expn > rem, expn >= 0, rem >= 0)`. + // `2^expn` is the number represented by the leftmost '1' bit of `n`, + // and `rem` is the remaining part of `n`. + + // Using `Vec` to access `set_len()`. + let mut buf = Vec::with_capacity(self.len() * n); + + // `2^expn` repetition is done by doubling `buf` `expn`-times. + buf.extend(self.as_bytes()); + { + let mut m = n >> 1; + // If `m > 0`, there are remaining bits up to the leftmost '1'. + while m > 0 { + // `buf.extend(buf)`: + unsafe { + ptr::copy_nonoverlapping( + buf.as_ptr(), + (buf.as_mut_ptr() as *mut u8).add(buf.len()), + buf.len(), + ); + // `buf` has capacity of `self.len() * n`. + let buf_len = buf.len(); + buf.set_len(buf_len * 2); + } + + m >>= 1; } - m >>= 1; } - // k: - let res_len = n * self.len(); - if res_len > s.len() { + // `rem` (`= n - 2^expn`) repetition is done by copying + // first `rem` repetitions from `buf` itself. + let rem_len = self.len() * n - buf.len(); // `self.len() * rem` + if rem_len > 0 { + // `buf.extend(buf[0 .. rem_len])`: unsafe { + // This is non-overlapping since `2^expn > rem`. ptr::copy_nonoverlapping( - s.as_ptr(), - (s.as_mut_ptr() as *mut u8).add(s.len()), - res_len - s.len(), + buf.as_ptr(), + (buf.as_mut_ptr() as *mut u8).add(buf.len()), + rem_len, ); - s.set_len(res_len); + // `buf.len() + rem_len` equals to `buf.capacity()` (`self.len() * n`). + let buf_len = buf.len(); + buf.set_len(buf_len + rem_len); } } - unsafe { String::from_utf8_unchecked(s) } + unsafe { String::from_utf8_unchecked(buf) } } /// Checks if all characters in this string are within the ASCII range. From 3d58543d49266a7ec3eb5f5f2ffaf902fce17c53 Mon Sep 17 00:00:00 2001 From: Shotaro Yamada Date: Sun, 4 Mar 2018 09:43:29 +0900 Subject: [PATCH 3/3] Avoid unnecessary calculation --- src/liballoc/str.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/liballoc/str.rs b/src/liballoc/str.rs index 6d153bf02b3ba..64e815b1fbaa5 100644 --- a/src/liballoc/str.rs +++ b/src/liballoc/str.rs @@ -2113,9 +2113,9 @@ impl str { (buf.as_mut_ptr() as *mut u8).add(buf.len()), rem_len, ); - // `buf.len() + rem_len` equals to `buf.capacity()` (`self.len() * n`). - let buf_len = buf.len(); - buf.set_len(buf_len + rem_len); + // `buf.len() + rem_len` equals to `buf.capacity()` (`= self.len() * n`). + let buf_cap = buf.capacity(); + buf.set_len(buf_cap); } }