Skip to content

Commit 17b0a02

Browse files
committed
Optimize escape_ascii
1 parent 4074d49 commit 17b0a02

File tree

1 file changed

+57
-25
lines changed

1 file changed

+57
-25
lines changed

library/core/src/escape.rs

+57-25
Original file line numberDiff line numberDiff line change
@@ -24,33 +24,65 @@ const fn backslash<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u
2424
const fn escape_ascii<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) {
2525
const { assert!(N >= 4) };
2626

27-
match byte {
28-
b'\t' => backslash(ascii::Char::SmallT),
29-
b'\r' => backslash(ascii::Char::SmallR),
30-
b'\n' => backslash(ascii::Char::SmallN),
31-
b'\\' => backslash(ascii::Char::ReverseSolidus),
32-
b'\'' => backslash(ascii::Char::Apostrophe),
33-
b'\"' => backslash(ascii::Char::QuotationMark),
34-
byte => {
35-
let mut output = [ascii::Char::Null; N];
36-
37-
if let Some(c) = byte.as_ascii()
38-
&& !byte.is_ascii_control()
39-
{
40-
output[0] = c;
41-
(output, 0..1)
42-
} else {
43-
let hi = HEX_DIGITS[(byte >> 4) as usize];
44-
let lo = HEX_DIGITS[(byte & 0xf) as usize];
45-
46-
output[0] = ascii::Char::ReverseSolidus;
47-
output[1] = ascii::Char::SmallX;
48-
output[2] = hi;
49-
output[3] = lo;
50-
51-
(output, 0..4)
27+
/// Lookup table helps us determine how to display character.
28+
///
29+
/// Since ASCII characters will always be 7 bits, we can exploit this to store the 8th bit to
30+
/// indicate whether the result is escaped or unescaped.
31+
///
32+
/// We additionally use 0x80 (escaped NUL character) to indicate hex-escaped bytes, since
33+
/// escaped NUL will not occur.
34+
const LOOKUP: [u8; 256] = {
35+
let mut arr = [0; 256];
36+
let mut idx = 0;
37+
loop {
38+
arr[idx as usize] = match idx {
39+
// use 8th bit to indicate escaped
40+
b'\t' => 0x80 | b't',
41+
b'\r' => 0x80 | b'r',
42+
b'\n' => 0x80 | b'n',
43+
b'\\' => 0x80 | b'\\',
44+
b'\'' => 0x80 | b'\'',
45+
b'"' => 0x80 | b'"',
46+
47+
// use NUL to indicate hex-escaped
48+
0x00..=0x1F | 0x7F..=0xFF => 0x80 | b'\0',
49+
50+
_ => idx,
51+
};
52+
if idx == 255 {
53+
break;
5254
}
55+
idx += 1;
5356
}
57+
arr
58+
};
59+
60+
let mut output = [ascii::Char::Null; N];
61+
let lookup = LOOKUP[byte as usize];
62+
63+
// 8th bit indicates escape
64+
if lookup & 0x80 != 0 {
65+
output[0] = ascii::Char::ReverseSolidus;
66+
67+
// SAFETY: We explicitly mask out the eighth bit.
68+
let lookup = unsafe { ascii::Char::from_u8_unchecked(lookup & 0x7F) };
69+
70+
// NUL indicates hex-escaped
71+
if matches!(lookup, ascii::Char::Null) {
72+
let hi = HEX_DIGITS[(byte >> 4) as usize];
73+
let lo = HEX_DIGITS[(byte & 0xF) as usize];
74+
output[1] = ascii::Char::SmallX;
75+
output[2] = hi;
76+
output[3] = lo;
77+
(output, 0..4)
78+
} else {
79+
output[1] = lookup;
80+
(output, 0..2)
81+
}
82+
} else {
83+
// SAFETY: We explicitly checked for the eighth bit.
84+
output[0] = unsafe { ascii::Char::from_u8_unchecked(lookup) };
85+
(output, 0..1)
5486
}
5587
}
5688

0 commit comments

Comments
 (0)