diff --git a/Cargo.lock b/Cargo.lock index c2c06a2adb14e..f52ae7340aad7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3913,6 +3913,7 @@ dependencies = [ name = "rustc_macros" version = "0.1.0" dependencies = [ + "phf_codegen", "proc-macro2", "quote", "syn", @@ -4200,6 +4201,7 @@ version = "0.0.0" dependencies = [ "cfg-if 0.1.10", "md-5", + "phf", "rustc_arena", "rustc_data_structures", "rustc_index", diff --git a/compiler/rustc_macros/Cargo.toml b/compiler/rustc_macros/Cargo.toml index 73eb0dd56d772..b123af8c172e2 100644 --- a/compiler/rustc_macros/Cargo.toml +++ b/compiler/rustc_macros/Cargo.toml @@ -12,3 +12,4 @@ synstructure = "0.12.1" syn = { version = "1", features = ["full"] } proc-macro2 = "1" quote = "1" +phf_codegen = "0.8" diff --git a/compiler/rustc_macros/src/symbols.rs b/compiler/rustc_macros/src/symbols.rs index 5b932864dff5d..4a12a3312b99e 100644 --- a/compiler/rustc_macros/src/symbols.rs +++ b/compiler/rustc_macros/src/symbols.rs @@ -124,10 +124,6 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec) { } }; - let mut keyword_stream = quote! {}; - let mut symbols_stream = quote! {}; - let mut prefill_stream = quote! {}; - let mut counter = 0u32; let mut keys = HashMap::::with_capacity(input.keywords.len() + input.symbols.len() + 10); let mut prev_key: Option<(Span, String)> = None; @@ -136,8 +132,10 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec) { if let Some(prev_span) = keys.get(str) { errors.error(span, format!("Symbol `{}` is duplicated", str)); errors.error(*prev_span, format!("location of previous definition")); + Err(()) } else { keys.insert(str.to_string(), span); + Ok(()) } }; @@ -151,51 +149,75 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec) { prev_key = Some((span, str.to_string())); }; + let mut symbol_strings: Vec = Vec::new(); + // Generate the listed keywords. + let mut keyword_stream = quote! {}; for keyword in input.keywords.iter() { let name = &keyword.name; let value = &keyword.value; let value_string = value.value(); - check_dup(keyword.name.span(), &value_string, &mut errors); - prefill_stream.extend(quote! { - #value, - }); + let symbol_index = symbol_strings.len() as u32; + if check_dup(keyword.name.span(), &value_string, &mut errors).is_ok() { + // Only add an entry to `symbol_strings` if it is not a duplicate. + // If it is a duplicate, then compilation will fail. However, we still + // want to avoid panicking, if a duplicate is detected. + symbol_strings.push(value_string); + } keyword_stream.extend(quote! { - pub const #name: Symbol = Symbol::new(#counter); + pub const #name: Symbol = Symbol::new(#symbol_index); }); - counter += 1; + } + + // Generate symbols for the strings "0", "1", ..., "9". + let digits_base = symbol_strings.len() as u32; + for n in 0..10 { + let n_string = n.to_string(); + if check_dup(Span::call_site(), &n_string, &mut errors).is_ok() { + symbol_strings.push(n_string); + } } // Generate the listed symbols. + let mut symbols_stream = quote! {}; for symbol in input.symbols.iter() { let name = &symbol.name; + let name_string = symbol.name.to_string(); + check_order(symbol.name.span(), &name_string, &mut errors); let value = match &symbol.value { Some(value) => value.value(), - None => name.to_string(), + None => name_string, }; - check_dup(symbol.name.span(), &value, &mut errors); - check_order(symbol.name.span(), &name.to_string(), &mut errors); - prefill_stream.extend(quote! { - #value, - }); + let symbol_index = symbol_strings.len() as u32; + if check_dup(symbol.name.span(), &value, &mut errors).is_ok() { + // Only add an entry to `symbol_strings` if it is not a duplicate. + // If it is a duplicate, then compilation will fail. However, we still + // want to avoid panicking, if a duplicate is detected. + symbol_strings.push(value); + } + symbols_stream.extend(quote! { - pub const #name: Symbol = Symbol::new(#counter); + pub const #name: Symbol = Symbol::new(#symbol_index); }); - counter += 1; } - // Generate symbols for the strings "0", "1", ..., "9". - let digits_base = counter; - counter += 10; - for n in 0..10 { - let n = n.to_string(); - check_dup(Span::call_site(), &n, &mut errors); - prefill_stream.extend(quote! { - #n, - }); + // We have finished collecting symbol strings. + let static_symbols_len = symbol_strings.len(); + let dynamic_symbol_base = symbol_strings.len() as u32; + let symbol_strings = symbol_strings; // no more mutation + + // Build the body of STATIC_SYMBOLS. + let symbol_strings_tokens: TokenStream = symbol_strings.iter().map(|s| quote!(#s,)).collect(); + + // Build the PHF map. This translates from strings to Symbol values. + let mut phf_map = phf_codegen::Map::<&str>::new(); + for (symbol_index, symbol) in symbol_strings.iter().enumerate() { + phf_map.entry(symbol, format!("Symbol::new({})", symbol_index as u32).as_str()); } - let _ = counter; // for future use + let phf_map_built = phf_map.build(); + let phf_map_text = phf_map_built.to_string(); + let phf_map_expr = syn::parse_str::(&phf_map_text).unwrap(); let output = quote! { const SYMBOL_DIGITS_BASE: u32 = #digits_base; @@ -215,13 +237,13 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec) { #symbols_stream } - impl Interner { - pub fn fresh() -> Self { - Interner::prefill(&[ - #prefill_stream - ]) - } - } + const DYNAMIC_SYMBOL_BASE: u32 = #dynamic_symbol_base; + + static STATIC_SYMBOLS: [&str; #static_symbols_len as usize] = [ + #symbol_strings_tokens + ]; + + static STATIC_SYMBOLS_PHF: ::phf::Map<&'static str, Symbol> = #phf_map_expr; }; (output, errors.list) diff --git a/compiler/rustc_span/Cargo.toml b/compiler/rustc_span/Cargo.toml index 08645990c4870..90b3c10f804dc 100644 --- a/compiler/rustc_span/Cargo.toml +++ b/compiler/rustc_span/Cargo.toml @@ -20,3 +20,4 @@ tracing = "0.1" sha-1 = "0.9" sha2 = "0.9" md-5 = "0.9" +phf = "0.8" diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index 50cb15554864f..143d73ab61249 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -90,7 +90,7 @@ pub struct SessionGlobals { impl SessionGlobals { pub fn new(edition: Edition) -> SessionGlobals { SessionGlobals { - symbol_interner: Lock::new(symbol::Interner::fresh()), + symbol_interner: Lock::new(symbol::Interner::default()), span_interner: Lock::new(span_encoding::SpanInterner::default()), hygiene_data: Lock::new(hygiene::HygieneData::new(edition)), source_map: Lock::new(None), diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 7b90e5b611cd1..8839897fcb3cd 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -1452,17 +1452,55 @@ impl Symbol { Symbol(SymbolIndex::from_u32(n)) } + /// Maps a string to its interned representation, but only if this string is a known + /// (static) symbol. + pub fn intern_static(string: &str) -> Option { + if let Some(symbol) = STATIC_SYMBOLS_PHF.get(string) { Some(*symbol) } else { None } + } + /// Maps a string to its interned representation. + // #[inline(never)] - There is no benefit to inlining this function (verified with + // performance measurements), and a reduction in overall code size by disabling inlining. + #[inline(never)] pub fn intern(string: &str) -> Self { - with_interner(|interner| interner.intern(string)) + if let Some(symbol) = Symbol::intern_static(string) { + symbol + } else { + with_interner(|interner| interner.intern_dynamic(string)) + } + } + + pub fn is_static(self) -> bool { + self.0.as_u32() < DYNAMIC_SYMBOL_BASE + } + + /// Translates the `Symbol` to a string, but only if this `Symbol` + /// was originally interned as a static symbol. + pub fn as_str_static(self) -> Option<&'static str> { + let symbol_index = self.0.as_usize(); + if symbol_index < STATIC_SYMBOLS.len() { + // This is a well-known symbol. The symbol string is stored in a static field. + // There is no need to lock the interner. + Some(STATIC_SYMBOLS[symbol_index]) + } else { + None + } } /// Convert to a `SymbolStr`. This is a slowish operation because it /// requires locking the symbol interner. + /// + /// If the symbol is a statically-interned symbol (interned at rustc compile time), + /// then this operation is fast, and does not acquire any locks. pub fn as_str(self) -> SymbolStr { - with_interner(|interner| unsafe { - SymbolStr { string: std::mem::transmute::<&str, &str>(interner.get(self)) } - }) + if let Some(string) = self.as_str_static() { + SymbolStr { string } + } else { + // This is a dynamic string. The string is stored in the Interner. + with_interner(|interner| unsafe { + SymbolStr { string: std::mem::transmute::<&str, &str>(interner.get_dynamic(self)) } + }) + } } pub fn as_u32(self) -> u32 { @@ -1528,6 +1566,13 @@ impl ToStableHashKey for Symbol { // The `FxHashMap`+`Vec` pair could be replaced by `FxIndexSet`, but #75278 // found that to regress performance up to 2% in some cases. This might be // revisited after further improvements to `indexmap`. +// +// `Interner` does not contain any of the statically-known symbol names. +// It does not contain any of the strings defined in the `Keyword` or +// `Symbol` sections. Since those strings are statically-known, we just +// look them up in a (static) table, when needed. See +// `STATIC_SYMBOLS` and `STATIC_SYMBOLS_PHF`, which are both generated by +// `compiler/rustc_macros/src/symbols.rs`. #[derive(Default)] pub struct Interner { arena: DroplessArena, @@ -1536,21 +1581,28 @@ pub struct Interner { } impl Interner { - fn prefill(init: &[&'static str]) -> Self { - Interner { - strings: init.into(), - names: init.iter().copied().zip((0..).map(Symbol::new)).collect(), - ..Default::default() + pub fn intern(&mut self, string: &str) -> Symbol { + if let Some(sym) = Symbol::intern_static(string) { + sym + } else { + self.intern_dynamic(string) } } - #[inline] - pub fn intern(&mut self, string: &str) -> Symbol { + fn intern_dynamic(&mut self, string: &str) -> Symbol { + // The caller should have already checked for static symbols. + // Failure to do so is a bug, since this code will mistakenly + // intern the static symbol, resulting in a bogus symbol index. + // (The whole point of this design is that you can do static + // lookups without acquiring the thread-local Interner, so if + // we got here with a static symbol, we goofed.) + debug_assert!(Symbol::intern_static(string).is_none()); + if let Some(&name) = self.names.get(string) { return name; } - let name = Symbol::new(self.strings.len() as u32); + let name = Symbol::new(DYNAMIC_SYMBOL_BASE + self.strings.len() as u32); // `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be // UTF-8. @@ -1567,7 +1619,16 @@ impl Interner { // Get the symbol as a string. `Symbol::as_str()` should be used in // preference to this function. pub fn get(&self, symbol: Symbol) -> &str { - self.strings[symbol.0.as_usize()] + if let Some(string) = symbol.as_str_static() { + string + } else { + &self.strings[(symbol.as_u32() - DYNAMIC_SYMBOL_BASE) as usize] + } + } + + fn get_dynamic(&self, symbol: Symbol) -> &str { + debug_assert!(!symbol.is_static()); + self.strings[(symbol.as_u32() - DYNAMIC_SYMBOL_BASE) as usize] } } diff --git a/compiler/rustc_span/src/symbol/tests.rs b/compiler/rustc_span/src/symbol/tests.rs index 47da03424b770..f4102089ddb9d 100644 --- a/compiler/rustc_span/src/symbol/tests.rs +++ b/compiler/rustc_span/src/symbol/tests.rs @@ -5,15 +5,14 @@ use crate::{edition, SessionGlobals}; #[test] fn interner_tests() { let mut i: Interner = Interner::default(); - // first one is zero: - assert_eq!(i.intern("dog"), Symbol::new(0)); + let dog = i.intern("dog"); // re-use gets the same entry: - assert_eq!(i.intern("dog"), Symbol::new(0)); + assert_eq!(i.intern("dog").as_u32(), dog.as_u32()); // different string gets a different #: - assert_eq!(i.intern("cat"), Symbol::new(1)); - assert_eq!(i.intern("cat"), Symbol::new(1)); - // dog is still at zero - assert_eq!(i.intern("dog"), Symbol::new(0)); + let cat = i.intern("cat"); + assert_ne!(dog.as_u32(), cat.as_u32()); + assert_eq!(i.intern("cat").as_u32(), cat.as_u32()); + assert_eq!(i.intern("dog").as_u32(), dog.as_u32()); } #[test] @@ -23,3 +22,68 @@ fn without_first_quote_test() { assert_eq!(i.without_first_quote().name, kw::Break); }); } + +#[test] +fn test_static_symbols() { + assert_eq!(Symbol::intern_static(""), Some(kw::Invalid)); + assert_eq!(Symbol::intern_static("not in the static table"), None); + assert!(Symbol::intern_static("fn").is_some()); // don't care about exact index + + // check round-tripping + for &string in ["as", "fn", "let", "trait", "size_of_val"].iter() { + let sym = Symbol::intern_static(string).unwrap(); + assert_eq!(string, &*sym.as_str(), "sym #{}", sym.0.as_u32()); + } +} + +#[test] +fn test_ident_is_special() { + for &s in [kw::Invalid, kw::PathRoot, kw::DollarCrate, kw::Underscore].iter() { + let ident = Ident::with_dummy_span(s); + assert_eq!(ident.is_special(), true, "s = {:?}", s); + } + + for &s in [kw::As, kw::Break, kw::UnderscoreLifetime].iter() { + let ident = Ident::with_dummy_span(s); + assert_eq!(ident.is_special(), false, "s = {:?}", s); + } +} + +#[test] +fn test_symbol_as_str() { + SESSION_GLOBALS.set(&SessionGlobals::new(edition::Edition::Edition2018), || { + for &(sym, string) in [ + (kw::Invalid, ""), + (kw::PathRoot, "{{root}}"), + (kw::DollarCrate, "$crate"), + (kw::As, "as"), + (kw::Break, "break"), + (kw::While, "while"), + (kw::Union, "union"), + (sym::Alignment, "Alignment"), + (sym::Arc, "Arc"), + (sym::zmm_reg, "zmm_reg"), + (sym::i64, "i64"), + ] + .iter() + { + let as_str = sym.as_str(); + assert_eq!(&*as_str, string); + + let sym2 = Symbol::intern(string); + assert_eq!(sym, sym2, "sym={} sym2={}", sym.as_u32(), sym2.as_u32()); + } + + let colon = Symbol::intern(":"); + assert_eq!(&*colon.as_str(), ":"); + }); +} + +#[test] +fn test_dynamic_symbols() { + crate::with_session_globals(crate::edition::Edition::Edition2018, || { + let s1 = Symbol::intern("fuzzy wuzzy"); + assert!(!s1.is_static()); + assert_eq!(&*s1.as_str(), "fuzzy wuzzy"); + }); +} diff --git a/src/tools/tidy/src/deps.rs b/src/tools/tidy/src/deps.rs index 4b521985ca1ad..733dc1e67fd30 100644 --- a/src/tools/tidy/src/deps.rs +++ b/src/tools/tidy/src/deps.rs @@ -132,6 +132,10 @@ const PERMITTED_DEPENDENCIES: &[&str] = &[ "parking_lot", "parking_lot_core", "pathdiff", + "phf", + "phf_codegen", + "phf_generator", + "phf_shared", "pkg-config", "polonius-engine", "ppv-lite86", @@ -163,6 +167,7 @@ const PERMITTED_DEPENDENCIES: &[&str] = &[ "serde_derive", "sha-1", "sha2", + "siphasher", "smallvec", "snap", "stable_deref_trait",