Skip to content

Refactor and adjust non_ascii_idents lints. #72069

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
235 changes: 104 additions & 131 deletions src/librustc_lint/non_ascii_idents.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
use crate::{EarlyContext, EarlyLintPass, LintContext};
use rustc_ast::ast;
use rustc_data_structures::fx::FxHashMap;
use rustc_span::symbol::{Ident, SymbolStr};
use std::hash::{Hash, Hasher};
use std::ops::Deref;
use rustc_span::symbol::SymbolStr;

declare_lint! {
pub NON_ASCII_IDENTS,
Expand All @@ -19,158 +17,133 @@ declare_lint! {
crate_level_only
}

// FIXME: Change this to warn.
declare_lint! {
pub CONFUSABLE_IDENTS,
Allow,
Warn,
"detects visually confusable pairs between identifiers",
crate_level_only
}

declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS, UNCOMMON_CODEPOINTS, CONFUSABLE_IDENTS]);

enum CowBoxSymStr {
Interned(SymbolStr),
Owned(Box<str>),
}

impl Deref for CowBoxSymStr {
type Target = str;

fn deref(&self) -> &str {
match self {
CowBoxSymStr::Interned(interned) => interned,
CowBoxSymStr::Owned(ref owned) => owned,
}
}
}

impl Hash for CowBoxSymStr {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
Hash::hash(&**self, state)
}
}

impl PartialEq<CowBoxSymStr> for CowBoxSymStr {
#[inline]
fn eq(&self, other: &CowBoxSymStr) -> bool {
PartialEq::eq(&**self, &**other)
}
}

impl Eq for CowBoxSymStr {}

fn calc_skeleton(symbol_str: SymbolStr, buffer: &'_ mut String) -> CowBoxSymStr {
use std::mem::swap;
use unicode_security::confusable_detection::skeleton;
buffer.clear();
buffer.extend(skeleton(&symbol_str));
if symbol_str == *buffer {
CowBoxSymStr::Interned(symbol_str)
} else {
let mut owned = String::new();
swap(buffer, &mut owned);
CowBoxSymStr::Owned(owned.into_boxed_str())
}
}

fn is_in_ascii_confusable_closure(c: char) -> bool {
// FIXME: move this table to `unicode_security` crate.
// data here corresponds to Unicode 13.
const ASCII_CONFUSABLE_CLOSURE: &[(u64, u64)] = &[(0x00, 0x7f), (0xba, 0xba), (0x2080, 0x2080)];
let c = c as u64;
for &(range_start, range_end) in ASCII_CONFUSABLE_CLOSURE {
if c >= range_start && c <= range_end {
return true;
}
}
false
}

fn is_in_ascii_confusable_closure_relevant_list(c: char) -> bool {
// FIXME: move this table to `unicode_security` crate.
// data here corresponds to Unicode 13.
const ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST: &[u64] = &[
0x22, 0x25, 0x27, 0x2f, 0x30, 0x31, 0x49, 0x4f, 0x60, 0x6c, 0x6d, 0x6e, 0x72, 0x7c, 0xba,
0x2080,
];
let c = c as u64;
for &item in ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST {
if c == item {
return true;
}
}
false
}

impl EarlyLintPass for NonAsciiIdents {
fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
use rustc_session::lint::Level;
if cx.builder.lint_level(CONFUSABLE_IDENTS).0 == Level::Allow {
use rustc_span::Span;
use unicode_security::GeneralSecurityProfile;
use utils::CowBoxSymStr;

let check_non_ascii_idents = cx.builder.lint_level(NON_ASCII_IDENTS).0 != Level::Allow;
let check_uncommon_codepoints =
cx.builder.lint_level(UNCOMMON_CODEPOINTS).0 != Level::Allow;
let check_confusable_idents = cx.builder.lint_level(CONFUSABLE_IDENTS).0 != Level::Allow;

if !check_non_ascii_idents && !check_uncommon_codepoints && !check_confusable_idents {
return;
}

let mut has_non_ascii_idents = false;
let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock();
let mut symbol_strs_and_spans = Vec::with_capacity(symbols.len());
let mut in_fast_path = true;
for (symbol, sp) in symbols.iter() {
// fast path
for (symbol, &sp) in symbols.iter() {
let symbol_str = symbol.as_str();
if !symbol_str.chars().all(is_in_ascii_confusable_closure) {
// fallback to slow path.
symbol_strs_and_spans.clear();
in_fast_path = false;
break;
if symbol_str.is_ascii() {
continue;
}
if symbol_str.chars().any(is_in_ascii_confusable_closure_relevant_list) {
symbol_strs_and_spans.push((symbol_str, *sp));
has_non_ascii_idents = true;
cx.struct_span_lint(NON_ASCII_IDENTS, sp, |lint| {
lint.build("identifier contains non-ASCII characters").emit()
});
if check_uncommon_codepoints
&& !symbol_str.chars().all(GeneralSecurityProfile::identifier_allowed)
{
cx.struct_span_lint(UNCOMMON_CODEPOINTS, sp, |lint| {
lint.build("identifier contains uncommon Unicode codepoints").emit()
})
}
}
if !in_fast_path {
// slow path
for (symbol, sp) in symbols.iter() {

if has_non_ascii_idents && check_confusable_idents {
let mut skeleton_map: FxHashMap<CowBoxSymStr, (SymbolStr, Span, bool)> =
FxHashMap::with_capacity_and_hasher(symbols.len(), Default::default());
let mut str_buf = String::new();
for (symbol, &sp) in symbols.iter() {
fn calc_skeleton(symbol_str: &SymbolStr, buffer: &mut String) -> CowBoxSymStr {
use std::mem::replace;
use unicode_security::confusable_detection::skeleton;
buffer.clear();
buffer.extend(skeleton(symbol_str));
if *symbol_str == *buffer {
CowBoxSymStr::Interned(symbol_str.clone())
} else {
let owned = replace(buffer, String::new());
CowBoxSymStr::Owned(owned.into_boxed_str())
}
}
let symbol_str = symbol.as_str();
symbol_strs_and_spans.push((symbol_str, *sp));
let is_ascii = symbol_str.is_ascii();
let skeleton = calc_skeleton(&symbol_str, &mut str_buf);
skeleton_map
.entry(skeleton)
.and_modify(|(existing_symbolstr, existing_span, existing_is_ascii)| {
if !*existing_is_ascii || !is_ascii {
cx.struct_span_lint(CONFUSABLE_IDENTS, sp, |lint| {
lint.build(&format!(
"identifier pair considered confusable between `{}` and `{}`",
existing_symbolstr, symbol_str
))
.span_label(
*existing_span,
"this is where the previous identifier occurred",
)
.emit();
});
}
if *existing_is_ascii && !is_ascii {
*existing_symbolstr = symbol_str.clone();
*existing_span = sp;
*existing_is_ascii = is_ascii;
}
})
.or_insert((symbol_str, sp, is_ascii));
}
}
drop(symbols);
symbol_strs_and_spans.sort_by_key(|x| x.0.clone());
let mut skeleton_map =
FxHashMap::with_capacity_and_hasher(symbol_strs_and_spans.len(), Default::default());
let mut str_buf = String::new();
for (symbol_str, sp) in symbol_strs_and_spans {
let skeleton = calc_skeleton(symbol_str.clone(), &mut str_buf);
skeleton_map
.entry(skeleton)
.and_modify(|(existing_symbolstr, existing_span)| {
cx.struct_span_lint(CONFUSABLE_IDENTS, sp, |lint| {
lint.build(&format!(
"identifier pair considered confusable between `{}` and `{}`",
existing_symbolstr, symbol_str
))
.span_label(
*existing_span,
"this is where the previous identifier occurred",
)
.emit();
});
})
.or_insert((symbol_str, sp));
}
}

mod utils {
use rustc_span::symbol::SymbolStr;
use std::hash::{Hash, Hasher};
use std::ops::Deref;

pub(super) enum CowBoxSymStr {
Interned(SymbolStr),
Owned(Box<str>),
}

impl Deref for CowBoxSymStr {
type Target = str;

fn deref(&self) -> &str {
match self {
CowBoxSymStr::Interned(interned) => interned,
CowBoxSymStr::Owned(ref owned) => owned,
}
}
}
fn check_ident(&mut self, cx: &EarlyContext<'_>, ident: Ident) {
use unicode_security::GeneralSecurityProfile;
let name_str = ident.name.as_str();
if name_str.is_ascii() {
return;

impl Hash for CowBoxSymStr {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
Hash::hash(&**self, state)
}
cx.struct_span_lint(NON_ASCII_IDENTS, ident.span, |lint| {
lint.build("identifier contains non-ASCII characters").emit()
});
if !name_str.chars().all(GeneralSecurityProfile::identifier_allowed) {
cx.struct_span_lint(UNCOMMON_CODEPOINTS, ident.span, |lint| {
lint.build("identifier contains uncommon Unicode codepoints").emit()
})
}

impl PartialEq<CowBoxSymStr> for CowBoxSymStr {
#[inline]
fn eq(&self, other: &CowBoxSymStr) -> bool {
PartialEq::eq(&**self, &**other)
}
}

impl Eq for CowBoxSymStr {}
}
3 changes: 2 additions & 1 deletion src/librustc_session/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use rustc_span::hygiene::ExpnId;
use rustc_span::source_map::{FilePathMapping, SourceMap};
use rustc_span::{MultiSpan, Span, Symbol};

use std::collections::BTreeMap;
use std::path::PathBuf;
use std::str;

Expand Down Expand Up @@ -63,7 +64,7 @@ impl GatedSpans {
#[derive(Default)]
pub struct SymbolGallery {
/// All symbols occurred and their first occurrance span.
pub symbols: Lock<FxHashMap<Symbol, Span>>,
pub symbols: Lock<BTreeMap<Symbol, Span>>,
}

impl SymbolGallery {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,14 @@
#![deny(confusable_idents)]
#![allow(uncommon_codepoints, non_upper_case_globals)]

const s: usize = 42; //~ ERROR identifier pair considered confusable
const s: usize = 42;

fn main() {
let s = "rust";
let s = "rust"; //~ ERROR identifier pair considered confusable
not_affected();
}

fn not_affected() {
let s1 = 1;
let sl = 'l';
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
error: identifier pair considered confusable between `s` and ``
--> $DIR/lint-confusable-idents.rs:5:7
error: identifier pair considered confusable between `` and `s`
--> $DIR/lint-confusable-idents.rs:8:9
|
LL | const s: usize = 42;
| ^^
| -- this is where the previous identifier occurred
...
LL | let s = "rust";
| - this is where the previous identifier occurred
| ^
|
note: the lint level is defined here
--> $DIR/lint-confusable-idents.rs:2:9
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,7 @@ fn coöperation() {} //~ ERROR identifier contains non-ASCII characters

fn main() {
let naïveté = 2; //~ ERROR identifier contains non-ASCII characters
println!("{}", naïveté); //~ ERROR identifier contains non-ASCII characters

// using the same identifier the second time won't trigger the lint.
println!("{}", naïveté);
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,5 @@ error: identifier contains non-ASCII characters
LL | let naïveté = 2;
| ^^^^^^^

error: identifier contains non-ASCII characters
--> $DIR/lint-non-ascii-idents.rs:10:20
|
LL | println!("{}", naïveté);
| ^^^^^^^

error: aborting due to 4 previous errors
error: aborting due to 3 previous errors

Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,7 @@ fn dijkstra() {} //~ ERROR identifier contains uncommon Unicode codepoints

fn main() {
let ㇻㇲㇳ = "rust"; //~ ERROR identifier contains uncommon Unicode codepoints
println!("{}", ㇻㇲㇳ); //~ ERROR identifier contains uncommon Unicode codepoints

// using the same identifier the second time won't trigger the lint.
println!("{}", ㇻㇲㇳ);
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,5 @@ error: identifier contains uncommon Unicode codepoints
LL | let ㇻㇲㇳ = "rust";
| ^^^^^^

error: identifier contains uncommon Unicode codepoints
--> $DIR/lint-uncommon-codepoints.rs:10:20
|
LL | println!("{}", ㇻㇲㇳ);
| ^^^^^^

error: aborting due to 4 previous errors
error: aborting due to 3 previous errors

2 changes: 2 additions & 0 deletions src/test/ui/parser/issue-62524.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
// ignore-tidy-trailing-newlines
// error-pattern: aborting due to 3 previous errors
#![allow(uncommon_codepoints)]

y![
Ϥ,
6 changes: 3 additions & 3 deletions src/test/ui/parser/issue-62524.stderr
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
error: this file contains an unclosed delimiter
--> $DIR/issue-62524.rs:4:3
--> $DIR/issue-62524.rs:6:3
|
LL | y![
| - unclosed delimiter
LL | Ϥ,
| ^

error: macros that expand to items must be delimited with braces or followed by a semicolon
--> $DIR/issue-62524.rs:3:3
--> $DIR/issue-62524.rs:5:3
|
LL | y![
| ___^
Expand All @@ -24,7 +24,7 @@ LL | Ϥ,;
| ^

error: cannot find macro `y` in this scope
--> $DIR/issue-62524.rs:3:1
--> $DIR/issue-62524.rs:5:1
|
LL | y![
| ^
Expand Down