Skip to content

Commit cebe5e8

Browse files
committed
Reduced allocations in merge_sort for short vectors
Added a seperate in-place insertion sort for short vectors. Increased threshold for insertion short for 8 to 32 elements for small types and 16 for larger types. Added benchmarks for sorting larger types.
1 parent ef53b7a commit cebe5e8

File tree

1 file changed

+104
-5
lines changed

1 file changed

+104
-5
lines changed

src/libstd/vec.rs

+104-5
Original file line numberDiff line numberDiff line change
@@ -1812,12 +1812,70 @@ impl<T:Eq> OwnedEqVector<T> for ~[T] {
18121812
}
18131813
}
18141814

1815+
fn insertion_sort<T>(v: &mut [T], compare: |&T, &T| -> Ordering) {
1816+
let len = v.len() as int;
1817+
let buf_v = v.as_mut_ptr();
1818+
1819+
// 1 <= i < len;
1820+
for i in range(1, len) {
1821+
// j satisfies: 0 <= j <= i;
1822+
let mut j = i;
1823+
unsafe {
1824+
// `i` is in bounds.
1825+
let read_ptr = buf_v.offset(i) as *T;
1826+
1827+
// find where to insert, we need to do strict <,
1828+
// rather than <=, to maintain stability.
1829+
1830+
// 0 <= j - 1 < len, so .offset(j - 1) is in bounds.
1831+
while j > 0 &&
1832+
compare(&*read_ptr, &*buf_v.offset(j - 1)) == Less {
1833+
j -= 1;
1834+
}
1835+
1836+
// shift everything to the right, to make space to
1837+
// insert this value.
1838+
1839+
// j + 1 could be `len` (for the last `i`), but in
1840+
// that case, `i == j` so we don't copy. The
1841+
// `.offset(j)` is always in bounds.
1842+
1843+
if i != j {
1844+
let tmp = ptr::read_ptr(read_ptr);
1845+
ptr::copy_memory(buf_v.offset(j + 1),
1846+
buf_v.offset(j),
1847+
(i - j) as uint);
1848+
ptr::copy_nonoverlapping_memory(buf_v.offset(j),
1849+
&tmp as *T,
1850+
1);
1851+
cast::forget(tmp);
1852+
}
1853+
}
1854+
}
1855+
}
1856+
18151857
fn merge_sort<T>(v: &mut [T], compare: |&T, &T| -> Ordering) {
18161858
// warning: this wildly uses unsafe.
1817-
static INSERTION: uint = 8;
1859+
static BASE_INSERTION: uint = 32;
1860+
static LARGE_INSERTION: uint = 16;
1861+
1862+
// FIXME #12092: smaller insertion runs seems to make sorting
1863+
// vectors of large elements a little faster on some platforms,
1864+
// but hasn't been tested/tuned extensively
1865+
let insertion = if size_of::<T>() <= 16 {
1866+
BASE_INSERTION
1867+
} else {
1868+
LARGE_INSERTION
1869+
};
18181870

18191871
let len = v.len();
18201872

1873+
// short vectors get sorted in-place via insertion sort to avoid allocations
1874+
if len <= insertion {
1875+
insertion_sort(v, compare);
1876+
return;
1877+
}
1878+
18211879
// allocate some memory to use as scratch memory, we keep the
18221880
// length 0 so we can keep shallow copies of the contents of `v`
18231881
// without risking the dtors running on an object twice if
@@ -1837,9 +1895,9 @@ fn merge_sort<T>(v: &mut [T], compare: |&T, &T| -> Ordering) {
18371895
// We could hardcode the sorting comparisons here, and we could
18381896
// manipulate/step the pointers themselves, rather than repeatedly
18391897
// .offset-ing.
1840-
for start in range_step(0, len, INSERTION) {
1841-
// start <= i <= len;
1842-
for i in range(start, cmp::min(start + INSERTION, len)) {
1898+
for start in range_step(0, len, insertion) {
1899+
// start <= i < len;
1900+
for i in range(start, cmp::min(start + insertion, len)) {
18431901
// j satisfies: start <= j <= i;
18441902
let mut j = i as int;
18451903
unsafe {
@@ -1871,7 +1929,7 @@ fn merge_sort<T>(v: &mut [T], compare: |&T, &T| -> Ordering) {
18711929
}
18721930

18731931
// step 2. merge the sorted runs.
1874-
let mut width = INSERTION;
1932+
let mut width = insertion;
18751933
while width < len {
18761934
// merge the sorted runs of length `width` in `buf_dat` two at
18771935
// a time, placing the result in `buf_tmp`.
@@ -4505,4 +4563,45 @@ mod bench {
45054563
});
45064564
bh.bytes = (v.len() * mem::size_of_val(&v[0])) as u64;
45074565
}
4566+
4567+
type BigSortable = (u64,u64,u64,u64);
4568+
4569+
#[bench]
4570+
fn sort_big_random_small(bh: &mut BenchHarness) {
4571+
let mut rng = weak_rng();
4572+
bh.iter(|| {
4573+
let mut v: ~[BigSortable] = rng.gen_vec(5);
4574+
v.sort();
4575+
});
4576+
bh.bytes = 5 * mem::size_of::<BigSortable>() as u64;
4577+
}
4578+
4579+
#[bench]
4580+
fn sort_big_random_medium(bh: &mut BenchHarness) {
4581+
let mut rng = weak_rng();
4582+
bh.iter(|| {
4583+
let mut v: ~[BigSortable] = rng.gen_vec(100);
4584+
v.sort();
4585+
});
4586+
bh.bytes = 100 * mem::size_of::<BigSortable>() as u64;
4587+
}
4588+
4589+
#[bench]
4590+
fn sort_big_random_large(bh: &mut BenchHarness) {
4591+
let mut rng = weak_rng();
4592+
bh.iter(|| {
4593+
let mut v: ~[BigSortable] = rng.gen_vec(10000);
4594+
v.sort();
4595+
});
4596+
bh.bytes = 10000 * mem::size_of::<BigSortable>() as u64;
4597+
}
4598+
4599+
#[bench]
4600+
fn sort_big_sorted(bh: &mut BenchHarness) {
4601+
let mut v = vec::from_fn(10000u, |i| (i, i, i, i));
4602+
bh.iter(|| {
4603+
v.sort();
4604+
});
4605+
bh.bytes = (v.len() * mem::size_of_val(&v[0])) as u64;
4606+
}
45084607
}

0 commit comments

Comments
 (0)