diff --git a/src/librustc_lint/non_ascii_idents.rs b/src/librustc_lint/non_ascii_idents.rs index 064b0255397ce..90bd7ad4acf71 100644 --- a/src/librustc_lint/non_ascii_idents.rs +++ b/src/librustc_lint/non_ascii_idents.rs @@ -1,9 +1,7 @@ use crate::{EarlyContext, EarlyLintPass, LintContext}; use rustc_ast::ast; use rustc_data_structures::fx::FxHashMap; -use rustc_span::symbol::{Ident, SymbolStr}; -use std::hash::{Hash, Hasher}; -use std::ops::Deref; +use rustc_span::symbol::SymbolStr; declare_lint! { pub NON_ASCII_IDENTS, @@ -19,158 +17,133 @@ declare_lint! { crate_level_only } -// FIXME: Change this to warn. declare_lint! { pub CONFUSABLE_IDENTS, - Allow, + Warn, "detects visually confusable pairs between identifiers", crate_level_only } declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS, UNCOMMON_CODEPOINTS, CONFUSABLE_IDENTS]); -enum CowBoxSymStr { - Interned(SymbolStr), - Owned(Box), -} - -impl Deref for CowBoxSymStr { - type Target = str; - - fn deref(&self) -> &str { - match self { - CowBoxSymStr::Interned(interned) => interned, - CowBoxSymStr::Owned(ref owned) => owned, - } - } -} - -impl Hash for CowBoxSymStr { - #[inline] - fn hash(&self, state: &mut H) { - Hash::hash(&**self, state) - } -} - -impl PartialEq for CowBoxSymStr { - #[inline] - fn eq(&self, other: &CowBoxSymStr) -> bool { - PartialEq::eq(&**self, &**other) - } -} - -impl Eq for CowBoxSymStr {} - -fn calc_skeleton(symbol_str: SymbolStr, buffer: &'_ mut String) -> CowBoxSymStr { - use std::mem::swap; - use unicode_security::confusable_detection::skeleton; - buffer.clear(); - buffer.extend(skeleton(&symbol_str)); - if symbol_str == *buffer { - CowBoxSymStr::Interned(symbol_str) - } else { - let mut owned = String::new(); - swap(buffer, &mut owned); - CowBoxSymStr::Owned(owned.into_boxed_str()) - } -} - -fn is_in_ascii_confusable_closure(c: char) -> bool { - // FIXME: move this table to `unicode_security` crate. - // data here corresponds to Unicode 13. - const ASCII_CONFUSABLE_CLOSURE: &[(u64, u64)] = &[(0x00, 0x7f), (0xba, 0xba), (0x2080, 0x2080)]; - let c = c as u64; - for &(range_start, range_end) in ASCII_CONFUSABLE_CLOSURE { - if c >= range_start && c <= range_end { - return true; - } - } - false -} - -fn is_in_ascii_confusable_closure_relevant_list(c: char) -> bool { - // FIXME: move this table to `unicode_security` crate. - // data here corresponds to Unicode 13. - const ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST: &[u64] = &[ - 0x22, 0x25, 0x27, 0x2f, 0x30, 0x31, 0x49, 0x4f, 0x60, 0x6c, 0x6d, 0x6e, 0x72, 0x7c, 0xba, - 0x2080, - ]; - let c = c as u64; - for &item in ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST { - if c == item { - return true; - } - } - false -} - impl EarlyLintPass for NonAsciiIdents { fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) { use rustc_session::lint::Level; - if cx.builder.lint_level(CONFUSABLE_IDENTS).0 == Level::Allow { + use rustc_span::Span; + use unicode_security::GeneralSecurityProfile; + use utils::CowBoxSymStr; + + let check_non_ascii_idents = cx.builder.lint_level(NON_ASCII_IDENTS).0 != Level::Allow; + let check_uncommon_codepoints = + cx.builder.lint_level(UNCOMMON_CODEPOINTS).0 != Level::Allow; + let check_confusable_idents = cx.builder.lint_level(CONFUSABLE_IDENTS).0 != Level::Allow; + + if !check_non_ascii_idents && !check_uncommon_codepoints && !check_confusable_idents { return; } + + let mut has_non_ascii_idents = false; let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock(); - let mut symbol_strs_and_spans = Vec::with_capacity(symbols.len()); - let mut in_fast_path = true; - for (symbol, sp) in symbols.iter() { - // fast path + for (symbol, &sp) in symbols.iter() { let symbol_str = symbol.as_str(); - if !symbol_str.chars().all(is_in_ascii_confusable_closure) { - // fallback to slow path. - symbol_strs_and_spans.clear(); - in_fast_path = false; - break; + if symbol_str.is_ascii() { + continue; } - if symbol_str.chars().any(is_in_ascii_confusable_closure_relevant_list) { - symbol_strs_and_spans.push((symbol_str, *sp)); + has_non_ascii_idents = true; + cx.struct_span_lint(NON_ASCII_IDENTS, sp, |lint| { + lint.build("identifier contains non-ASCII characters").emit() + }); + if check_uncommon_codepoints + && !symbol_str.chars().all(GeneralSecurityProfile::identifier_allowed) + { + cx.struct_span_lint(UNCOMMON_CODEPOINTS, sp, |lint| { + lint.build("identifier contains uncommon Unicode codepoints").emit() + }) } } - if !in_fast_path { - // slow path - for (symbol, sp) in symbols.iter() { + + if has_non_ascii_idents && check_confusable_idents { + let mut skeleton_map: FxHashMap = + FxHashMap::with_capacity_and_hasher(symbols.len(), Default::default()); + let mut str_buf = String::new(); + for (symbol, &sp) in symbols.iter() { + fn calc_skeleton(symbol_str: &SymbolStr, buffer: &mut String) -> CowBoxSymStr { + use std::mem::replace; + use unicode_security::confusable_detection::skeleton; + buffer.clear(); + buffer.extend(skeleton(symbol_str)); + if *symbol_str == *buffer { + CowBoxSymStr::Interned(symbol_str.clone()) + } else { + let owned = replace(buffer, String::new()); + CowBoxSymStr::Owned(owned.into_boxed_str()) + } + } let symbol_str = symbol.as_str(); - symbol_strs_and_spans.push((symbol_str, *sp)); + let is_ascii = symbol_str.is_ascii(); + let skeleton = calc_skeleton(&symbol_str, &mut str_buf); + skeleton_map + .entry(skeleton) + .and_modify(|(existing_symbolstr, existing_span, existing_is_ascii)| { + if !*existing_is_ascii || !is_ascii { + cx.struct_span_lint(CONFUSABLE_IDENTS, sp, |lint| { + lint.build(&format!( + "identifier pair considered confusable between `{}` and `{}`", + existing_symbolstr, symbol_str + )) + .span_label( + *existing_span, + "this is where the previous identifier occurred", + ) + .emit(); + }); + } + if *existing_is_ascii && !is_ascii { + *existing_symbolstr = symbol_str.clone(); + *existing_span = sp; + *existing_is_ascii = is_ascii; + } + }) + .or_insert((symbol_str, sp, is_ascii)); } } - drop(symbols); - symbol_strs_and_spans.sort_by_key(|x| x.0.clone()); - let mut skeleton_map = - FxHashMap::with_capacity_and_hasher(symbol_strs_and_spans.len(), Default::default()); - let mut str_buf = String::new(); - for (symbol_str, sp) in symbol_strs_and_spans { - let skeleton = calc_skeleton(symbol_str.clone(), &mut str_buf); - skeleton_map - .entry(skeleton) - .and_modify(|(existing_symbolstr, existing_span)| { - cx.struct_span_lint(CONFUSABLE_IDENTS, sp, |lint| { - lint.build(&format!( - "identifier pair considered confusable between `{}` and `{}`", - existing_symbolstr, symbol_str - )) - .span_label( - *existing_span, - "this is where the previous identifier occurred", - ) - .emit(); - }); - }) - .or_insert((symbol_str, sp)); + } +} + +mod utils { + use rustc_span::symbol::SymbolStr; + use std::hash::{Hash, Hasher}; + use std::ops::Deref; + + pub(super) enum CowBoxSymStr { + Interned(SymbolStr), + Owned(Box), + } + + impl Deref for CowBoxSymStr { + type Target = str; + + fn deref(&self) -> &str { + match self { + CowBoxSymStr::Interned(interned) => interned, + CowBoxSymStr::Owned(ref owned) => owned, + } } } - fn check_ident(&mut self, cx: &EarlyContext<'_>, ident: Ident) { - use unicode_security::GeneralSecurityProfile; - let name_str = ident.name.as_str(); - if name_str.is_ascii() { - return; + + impl Hash for CowBoxSymStr { + #[inline] + fn hash(&self, state: &mut H) { + Hash::hash(&**self, state) } - cx.struct_span_lint(NON_ASCII_IDENTS, ident.span, |lint| { - lint.build("identifier contains non-ASCII characters").emit() - }); - if !name_str.chars().all(GeneralSecurityProfile::identifier_allowed) { - cx.struct_span_lint(UNCOMMON_CODEPOINTS, ident.span, |lint| { - lint.build("identifier contains uncommon Unicode codepoints").emit() - }) + } + + impl PartialEq for CowBoxSymStr { + #[inline] + fn eq(&self, other: &CowBoxSymStr) -> bool { + PartialEq::eq(&**self, &**other) } } + + impl Eq for CowBoxSymStr {} } diff --git a/src/librustc_session/parse.rs b/src/librustc_session/parse.rs index ddbc95fb1b0b8..f4e5da4d54f46 100644 --- a/src/librustc_session/parse.rs +++ b/src/librustc_session/parse.rs @@ -13,6 +13,7 @@ use rustc_span::hygiene::ExpnId; use rustc_span::source_map::{FilePathMapping, SourceMap}; use rustc_span::{MultiSpan, Span, Symbol}; +use std::collections::BTreeMap; use std::path::PathBuf; use std::str; @@ -63,7 +64,7 @@ impl GatedSpans { #[derive(Default)] pub struct SymbolGallery { /// All symbols occurred and their first occurrance span. - pub symbols: Lock>, + pub symbols: Lock>, } impl SymbolGallery { diff --git a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.rs b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.rs index 12093837d2630..e15ed2e70b896 100644 --- a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.rs +++ b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.rs @@ -2,8 +2,14 @@ #![deny(confusable_idents)] #![allow(uncommon_codepoints, non_upper_case_globals)] -const s: usize = 42; //~ ERROR identifier pair considered confusable +const s: usize = 42; fn main() { - let s = "rust"; + let s = "rust"; //~ ERROR identifier pair considered confusable + not_affected(); +} + +fn not_affected() { + let s1 = 1; + let sl = 'l'; } diff --git a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.stderr b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.stderr index 40ee18acb3cd4..218f94f7b5829 100644 --- a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.stderr +++ b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.stderr @@ -1,11 +1,11 @@ -error: identifier pair considered confusable between `s` and `s` - --> $DIR/lint-confusable-idents.rs:5:7 +error: identifier pair considered confusable between `s` and `s` + --> $DIR/lint-confusable-idents.rs:8:9 | LL | const s: usize = 42; - | ^^ + | -- this is where the previous identifier occurred ... LL | let s = "rust"; - | - this is where the previous identifier occurred + | ^ | note: the lint level is defined here --> $DIR/lint-confusable-idents.rs:2:9 diff --git a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-non-ascii-idents.rs b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-non-ascii-idents.rs index 057329a0a650c..20d00cf701a15 100644 --- a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-non-ascii-idents.rs +++ b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-non-ascii-idents.rs @@ -7,5 +7,7 @@ fn coöperation() {} //~ ERROR identifier contains non-ASCII characters fn main() { let naïveté = 2; //~ ERROR identifier contains non-ASCII characters - println!("{}", naïveté); //~ ERROR identifier contains non-ASCII characters + + // using the same identifier the second time won't trigger the lint. + println!("{}", naïveté); } diff --git a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-non-ascii-idents.stderr b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-non-ascii-idents.stderr index 6c9f0866c017a..048b6ff5d687f 100644 --- a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-non-ascii-idents.stderr +++ b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-non-ascii-idents.stderr @@ -22,11 +22,5 @@ error: identifier contains non-ASCII characters LL | let naïveté = 2; | ^^^^^^^ -error: identifier contains non-ASCII characters - --> $DIR/lint-non-ascii-idents.rs:10:20 - | -LL | println!("{}", naïveté); - | ^^^^^^^ - -error: aborting due to 4 previous errors +error: aborting due to 3 previous errors diff --git a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.rs b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.rs index 7ac0d035d5bf1..b5e251e047b5a 100644 --- a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.rs +++ b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.rs @@ -7,5 +7,7 @@ fn dijkstra() {} //~ ERROR identifier contains uncommon Unicode codepoints fn main() { let ㇻㇲㇳ = "rust"; //~ ERROR identifier contains uncommon Unicode codepoints - println!("{}", ㇻㇲㇳ); //~ ERROR identifier contains uncommon Unicode codepoints + + // using the same identifier the second time won't trigger the lint. + println!("{}", ㇻㇲㇳ); } diff --git a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.stderr b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.stderr index b270bd1f051c2..05ea3d5de7dbc 100644 --- a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.stderr +++ b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.stderr @@ -22,11 +22,5 @@ error: identifier contains uncommon Unicode codepoints LL | let ㇻㇲㇳ = "rust"; | ^^^^^^ -error: identifier contains uncommon Unicode codepoints - --> $DIR/lint-uncommon-codepoints.rs:10:20 - | -LL | println!("{}", ㇻㇲㇳ); - | ^^^^^^ - -error: aborting due to 4 previous errors +error: aborting due to 3 previous errors diff --git a/src/test/ui/parser/issue-62524.rs b/src/test/ui/parser/issue-62524.rs index 57de4b87b0fe6..5259dfe2e656c 100644 --- a/src/test/ui/parser/issue-62524.rs +++ b/src/test/ui/parser/issue-62524.rs @@ -1,4 +1,6 @@ // ignore-tidy-trailing-newlines // error-pattern: aborting due to 3 previous errors +#![allow(uncommon_codepoints)] + y![ Ϥ, \ No newline at end of file diff --git a/src/test/ui/parser/issue-62524.stderr b/src/test/ui/parser/issue-62524.stderr index 8191c9682cefd..d5e07622b11b9 100644 --- a/src/test/ui/parser/issue-62524.stderr +++ b/src/test/ui/parser/issue-62524.stderr @@ -1,5 +1,5 @@ error: this file contains an unclosed delimiter - --> $DIR/issue-62524.rs:4:3 + --> $DIR/issue-62524.rs:6:3 | LL | y![ | - unclosed delimiter @@ -7,7 +7,7 @@ LL | Ϥ, | ^ error: macros that expand to items must be delimited with braces or followed by a semicolon - --> $DIR/issue-62524.rs:3:3 + --> $DIR/issue-62524.rs:5:3 | LL | y![ | ___^ @@ -24,7 +24,7 @@ LL | Ϥ,; | ^ error: cannot find macro `y` in this scope - --> $DIR/issue-62524.rs:3:1 + --> $DIR/issue-62524.rs:5:1 | LL | y![ | ^