@@ -101,6 +101,7 @@ mod table {
101
101
/// There's currently no "debug-only" asserts in rust, so if you're reading
102
102
/// this and going "what? of course there are debug-only asserts!", then
103
103
/// please make this use them!
104
+ #[ unsafe_no_drop_flag]
104
105
pub struct RawTable < K , V > {
105
106
capacity : uint ,
106
107
size : uint ,
@@ -549,38 +550,59 @@ mod table {
549
550
550
551
assert_eq ! ( self . size, 0 ) ;
551
552
552
- let hashes_size = self . capacity * size_of :: < u64 > ( ) ;
553
- let keys_size = self . capacity * size_of :: < K > ( ) ;
554
- let vals_size = self . capacity * size_of :: < V > ( ) ;
555
- let ( align, _, _, _, size) = calculate_offsets ( hashes_size, min_align_of :: < u64 > ( ) ,
556
- keys_size, min_align_of :: < K > ( ) ,
557
- vals_size, min_align_of :: < V > ( ) ) ;
553
+ if self . hashes . is_not_null ( ) {
554
+ let hashes_size = self . capacity * size_of :: < u64 > ( ) ;
555
+ let keys_size = self . capacity * size_of :: < K > ( ) ;
556
+ let vals_size = self . capacity * size_of :: < V > ( ) ;
557
+ let ( align, _, _, _, size) = calculate_offsets ( hashes_size, min_align_of :: < u64 > ( ) ,
558
+ keys_size, min_align_of :: < K > ( ) ,
559
+ vals_size, min_align_of :: < V > ( ) ) ;
560
+
561
+ unsafe {
562
+ deallocate ( self . hashes as * mut u8 , size, align) ;
563
+ // Remember how everything was allocated out of one buffer
564
+ // during initialization? We only need one call to free here.
565
+ }
558
566
559
- unsafe {
560
- deallocate ( self . hashes as * mut u8 , size, align) ;
561
- // Remember how everything was allocated out of one buffer
562
- // during initialization? We only need one call to free here.
567
+ self . hashes = RawPtr :: null ( ) ;
563
568
}
564
569
}
565
570
}
566
571
}
567
572
568
- // We use this type for the load factor, to avoid floating point operations
569
- // which might not be supported efficiently on some hardware.
570
- //
571
- // We use small u16s here to save space in the hashtable. They get upcasted
572
- // to u64s when we actually use them.
573
- type Fraction = ( u16 , u16 ) ; // (numerator, denominator)
574
-
575
- // multiplication by a fraction, in a way that won't generally overflow for
576
- // array sizes outside a factor of 10 of U64_MAX.
577
- fn fraction_mul ( lhs : uint , ( num, den) : Fraction ) -> uint {
578
- ( ( ( lhs as u64 ) * ( num as u64 ) ) / ( den as u64 ) ) as uint
579
- }
580
-
581
573
static INITIAL_LOG2_CAP : uint = 5 ;
582
574
static INITIAL_CAPACITY : uint = 1 << INITIAL_LOG2_CAP ; // 2^5
583
- static INITIAL_LOAD_FACTOR : Fraction = ( 9 , 10 ) ;
575
+
576
+ /// The default behavior of HashMap implements a load factor of 90.9%.
577
+ /// This behavior is characterized by the following conditions:
578
+ ///
579
+ /// - if `size * 1.1 < cap < size * 4` then shouldn't resize
580
+ /// - if `cap < minimum_capacity * 2` then shouldn't shrink
581
+ #[ deriving( Clone ) ]
582
+ struct DefaultResizePolicy {
583
+ /// Doubled minimal capacity. The capacity must never drop below
584
+ /// the minimum capacity. (The check happens before the capacity
585
+ /// is potentially halved.)
586
+ minimum_capacity2 : uint
587
+ }
588
+
589
+ impl DefaultResizePolicy {
590
+ fn new ( new_capacity : uint ) -> DefaultResizePolicy {
591
+ DefaultResizePolicy {
592
+ minimum_capacity2 : new_capacity << 1
593
+ }
594
+ }
595
+
596
+ #[ inline]
597
+ fn capacity_range ( & self , new_size : uint ) -> ( uint , uint ) {
598
+ ( ( new_size * 11 ) / 10 , max ( new_size << 3 , self . minimum_capacity2 ) )
599
+ }
600
+
601
+ #[ inline]
602
+ fn reserve ( & mut self , new_capacity : uint ) {
603
+ self . minimum_capacity2 = new_capacity << 1 ;
604
+ }
605
+ }
584
606
585
607
// The main performance trick in this hashmap is called Robin Hood Hashing.
586
608
// It gains its excellent performance from one key invariant:
@@ -593,13 +615,13 @@ static INITIAL_LOAD_FACTOR: Fraction = (9, 10);
593
615
// high load factors with good performance. The 90% load factor I use is rather
594
616
// conservative.
595
617
//
596
- // > Why a load factor of 90%?
618
+ // > Why a load factor of approximately 90%?
597
619
//
598
620
// In general, all the distances to initial buckets will converge on the mean.
599
621
// At a load factor of α, the odds of finding the target bucket after k
600
622
// probes is approximately 1-α^k. If we set this equal to 50% (since we converge
601
623
// on the mean) and set k=8 (64-byte cache line / 8-byte hash), α=0.92. I round
602
- // this down to 0.90 to make the math easier on the CPU and avoid its FPU.
624
+ // this down to make the math easier on the CPU and avoid its FPU.
603
625
// Since on average we start the probing in the middle of a cache line, this
604
626
// strategy pulls in two cache lines of hashes on every lookup. I think that's
605
627
// pretty good, but if you want to trade off some space, it could go down to one
@@ -616,8 +638,6 @@ static INITIAL_LOAD_FACTOR: Fraction = (9, 10);
616
638
// ============================
617
639
//
618
640
// Allow the load factor to be changed dynamically and/or at initialization.
619
- // I'm having trouble figuring out a sane API for this without exporting my
620
- // hackish fraction type, while still avoiding floating point.
621
641
//
622
642
// Also, would it be possible for us to reuse storage when growing the
623
643
// underlying table? This is exactly the use case for 'realloc', and may
@@ -715,31 +735,13 @@ pub struct HashMap<K, V, H = sip::SipHasher> {
715
735
// All hashes are keyed on these values, to prevent hash collision attacks.
716
736
hasher : H ,
717
737
718
- // When size == grow_at, we double the capacity.
719
- grow_at : uint ,
720
-
721
- // The capacity must never drop below this.
722
- minimum_capacity : uint ,
723
-
724
738
table : table:: RawTable < K , V > ,
725
739
726
- // We keep this at the end since it's 4-bytes, unlike everything else
727
- // in this struct. Might as well save a word of padding!
728
- load_factor : Fraction ,
729
- }
730
-
731
- /// Get the number of elements which will force the capacity to grow.
732
- fn grow_at ( capacity : uint , load_factor : Fraction ) -> uint {
733
- fraction_mul ( capacity, load_factor)
740
+ // We keep this at the end since it might as well have tail padding.
741
+ resize_policy : DefaultResizePolicy ,
734
742
}
735
743
736
744
impl < K : Eq + Hash < S > , V , S , H : Hasher < S > > HashMap < K , V , H > {
737
- /// Get the number of elements which will force the capacity to shrink.
738
- /// When size == self.shrink_at(), we halve the capacity.
739
- fn shrink_at ( & self ) -> uint {
740
- self . table . capacity ( ) >> 2
741
- }
742
-
743
745
// Probe the `idx`th bucket for a given hash, returning the index of the
744
746
// target bucket.
745
747
//
@@ -931,9 +933,12 @@ impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> Container for HashMap<K, V, H> {
931
933
}
932
934
933
935
impl < K : Eq + Hash < S > , V , S , H : Hasher < S > > Mutable for HashMap < K , V , H > {
934
- /// Clear the map, removing all key-value pairs.
936
+ /// Clear the map, removing all key-value pairs. Keeps the allocated memory
937
+ /// for reuse.
935
938
fn clear ( & mut self ) {
936
- self . minimum_capacity = self . table . size ( ) ;
939
+ // Prevent reallocations from happening from now on. Makes it possible
940
+ // for the map to be reused but has a downside: reserves permanently.
941
+ self . resize_policy . reserve ( self . table . size ( ) ) ;
937
942
938
943
for i in range ( 0 , self . table . capacity ( ) ) {
939
944
match self . table . peek ( i) {
@@ -944,7 +949,6 @@ impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> Mutable for HashMap<K, V, H> {
944
949
}
945
950
}
946
951
947
-
948
952
impl < K : Eq + Hash < S > , V , S , H : Hasher < S > > Map < K , V > for HashMap < K , V , H > {
949
953
fn find < ' a > ( & ' a self , k : & K ) -> Option < & ' a V > {
950
954
self . search ( k) . map ( |idx| {
@@ -1057,11 +1061,9 @@ impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
1057
1061
pub fn with_capacity_and_hasher ( capacity : uint , hasher : H ) -> HashMap < K , V , H > {
1058
1062
let cap = num:: next_power_of_two ( max ( INITIAL_CAPACITY , capacity) ) ;
1059
1063
HashMap {
1060
- hasher : hasher,
1061
- load_factor : INITIAL_LOAD_FACTOR ,
1062
- grow_at : grow_at ( cap, INITIAL_LOAD_FACTOR ) ,
1063
- minimum_capacity : cap,
1064
- table : table:: RawTable :: new ( cap) ,
1064
+ hasher : hasher,
1065
+ resize_policy : DefaultResizePolicy :: new ( cap) ,
1066
+ table : table:: RawTable :: new ( cap) ,
1065
1067
}
1066
1068
}
1067
1069
@@ -1075,7 +1077,7 @@ impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
1075
1077
let cap = num:: next_power_of_two (
1076
1078
max ( INITIAL_CAPACITY , new_minimum_capacity) ) ;
1077
1079
1078
- self . minimum_capacity = cap;
1080
+ self . resize_policy . reserve ( cap) ;
1079
1081
1080
1082
if self . table . capacity ( ) < cap {
1081
1083
self . resize ( cap) ;
@@ -1090,8 +1092,6 @@ impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
1090
1092
assert ! ( self . table. size( ) <= new_capacity) ;
1091
1093
assert ! ( num:: is_power_of_two( new_capacity) ) ;
1092
1094
1093
- self . grow_at = grow_at ( new_capacity, self . load_factor ) ;
1094
-
1095
1095
let old_table = replace ( & mut self . table , table:: RawTable :: new ( new_capacity) ) ;
1096
1096
let old_size = old_table. size ( ) ;
1097
1097
@@ -1105,19 +1105,18 @@ impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
1105
1105
/// Performs any necessary resize operations, such that there's space for
1106
1106
/// new_size elements.
1107
1107
fn make_some_room ( & mut self , new_size : uint ) {
1108
- let should_shrink = new_size < = self . shrink_at ( ) ;
1109
- let should_grow = self . grow_at <= new_size ;
1108
+ let ( grow_at , shrink_at ) = self . resize_policy . capacity_range ( new_size ) ;
1109
+ let cap = self . table . capacity ( ) ;
1110
1110
1111
- if should_grow {
1112
- let new_capacity = self . table . capacity ( ) << 1 ;
1113
- self . resize ( new_capacity) ;
1114
- } else if should_shrink {
1115
- let new_capacity = self . table . capacity ( ) >> 1 ;
1111
+ // An invalid value shouldn't make us run out of space.
1112
+ debug_assert ! ( grow_at >= new_size) ;
1116
1113
1117
- // Never shrink below the minimum capacity
1118
- if self . minimum_capacity <= new_capacity {
1119
- self . resize ( new_capacity) ;
1120
- }
1114
+ if cap <= grow_at {
1115
+ let new_capacity = cap << 1 ;
1116
+ self . resize ( new_capacity) ;
1117
+ } else if shrink_at <= cap {
1118
+ let new_capacity = cap >> 1 ;
1119
+ self . resize ( new_capacity) ;
1121
1120
}
1122
1121
}
1123
1122
@@ -2025,8 +2024,8 @@ mod test_map {
2025
2024
assert ! ( m. is_empty( ) ) ;
2026
2025
2027
2026
let mut i = 0 u;
2028
- let old_resize_at = m. grow_at ;
2029
- while old_resize_at == m. grow_at {
2027
+ let old_cap = m. table . capacity ( ) ;
2028
+ while old_cap == m. table . capacity ( ) {
2030
2029
m. insert ( i, i) ;
2031
2030
i += 1 ;
2032
2031
}
@@ -2035,6 +2034,52 @@ mod test_map {
2035
2034
assert ! ( !m. is_empty( ) ) ;
2036
2035
}
2037
2036
2037
+ #[ test]
2038
+ fn test_resize_policy ( ) {
2039
+ let mut m = HashMap :: new ( ) ;
2040
+
2041
+ assert_eq ! ( m. len( ) , 0 ) ;
2042
+ assert ! ( m. is_empty( ) ) ;
2043
+
2044
+ let initial_cap = m. table . capacity ( ) ;
2045
+ m. reserve ( initial_cap * 2 ) ;
2046
+ let cap = m. table . capacity ( ) ;
2047
+
2048
+ assert_eq ! ( cap, initial_cap * 2 ) ;
2049
+
2050
+ let mut i = 0 u;
2051
+ for _ in range ( 0 , cap * 3 / 4 ) {
2052
+ m. insert ( i, i) ;
2053
+ i += 1 ;
2054
+ }
2055
+
2056
+ assert_eq ! ( m. len( ) , i) ;
2057
+ assert_eq ! ( m. table. capacity( ) , cap) ;
2058
+
2059
+ for _ in range ( 0 , cap / 4 ) {
2060
+ m. insert ( i, i) ;
2061
+ i += 1 ;
2062
+ }
2063
+
2064
+ let new_cap = m. table . capacity ( ) ;
2065
+ assert_eq ! ( new_cap, cap * 2 ) ;
2066
+
2067
+ for _ in range ( 0 , cap / 2 ) {
2068
+ i -= 1 ;
2069
+ m. remove ( & i) ;
2070
+ assert_eq ! ( m. table. capacity( ) , new_cap) ;
2071
+ }
2072
+
2073
+ for _ in range ( 0 , cap / 2 - 1 ) {
2074
+ i -= 1 ;
2075
+ m. remove ( & i) ;
2076
+ }
2077
+
2078
+ assert_eq ! ( m. table. capacity( ) , cap) ;
2079
+ assert_eq ! ( m. len( ) , i) ;
2080
+ assert ! ( !m. is_empty( ) ) ;
2081
+ }
2082
+
2038
2083
#[ test]
2039
2084
fn test_find_equiv ( ) {
2040
2085
let mut m = HashMap :: new ( ) ;
0 commit comments