Skip to content

Commit da2b694

Browse files
committed
Restore old algorithm with optimize_for_size, slightly refactor to share some code (doesn't seem to affect benchmarks)
1 parent 17b0a02 commit da2b694

File tree

1 file changed

+94
-55
lines changed

1 file changed

+94
-55
lines changed

library/core/src/escape.rs

+94-55
Original file line numberDiff line numberDiff line change
@@ -18,71 +18,110 @@ const fn backslash<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u
1818
(output, 0..2)
1919
}
2020

21+
#[inline]
22+
const fn hex_escape<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) {
23+
const { assert!(N >= 4) };
24+
25+
let mut output = [ascii::Char::Null; N];
26+
27+
let hi = HEX_DIGITS[(byte >> 4) as usize];
28+
let lo = HEX_DIGITS[(byte & 0xf) as usize];
29+
30+
output[0] = ascii::Char::ReverseSolidus;
31+
output[1] = ascii::Char::SmallX;
32+
output[2] = hi;
33+
output[3] = lo;
34+
35+
(output, 0..4)
36+
}
37+
38+
#[inline]
39+
const fn verbatim<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u8>) {
40+
const { assert!(N >= 1) };
41+
42+
let mut output = [ascii::Char::Null; N];
43+
44+
output[0] = a;
45+
46+
(output, 0..1)
47+
}
48+
2149
/// Escapes an ASCII character.
2250
///
2351
/// Returns a buffer and the length of the escaped representation.
2452
const fn escape_ascii<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) {
2553
const { assert!(N >= 4) };
2654

27-
/// Lookup table helps us determine how to display character.
28-
///
29-
/// Since ASCII characters will always be 7 bits, we can exploit this to store the 8th bit to
30-
/// indicate whether the result is escaped or unescaped.
31-
///
32-
/// We additionally use 0x80 (escaped NUL character) to indicate hex-escaped bytes, since
33-
/// escaped NUL will not occur.
34-
const LOOKUP: [u8; 256] = {
35-
let mut arr = [0; 256];
36-
let mut idx = 0;
37-
loop {
38-
arr[idx as usize] = match idx {
39-
// use 8th bit to indicate escaped
40-
b'\t' => 0x80 | b't',
41-
b'\r' => 0x80 | b'r',
42-
b'\n' => 0x80 | b'n',
43-
b'\\' => 0x80 | b'\\',
44-
b'\'' => 0x80 | b'\'',
45-
b'"' => 0x80 | b'"',
46-
47-
// use NUL to indicate hex-escaped
48-
0x00..=0x1F | 0x7F..=0xFF => 0x80 | b'\0',
49-
50-
_ => idx,
51-
};
52-
if idx == 255 {
53-
break;
54-
}
55-
idx += 1;
55+
#[cfg(feature = "optimize_for_size")]
56+
{
57+
match byte {
58+
b'\t' => backslash(ascii::Char::SmallT),
59+
b'\r' => backslash(ascii::Char::SmallR),
60+
b'\n' => backslash(ascii::Char::SmallN),
61+
b'\\' => backslash(ascii::Char::ReverseSolidus),
62+
b'\'' => backslash(ascii::Char::Apostrophe),
63+
b'\"' => backslash(ascii::Char::QuotationMark),
64+
0x00..=0x1F => hex_escape(byte),
65+
_ => match ascii::Char::from_u8(byte) {
66+
Some(a) => verbatim(a),
67+
None => hex_escape(byte),
68+
},
5669
}
57-
arr
58-
};
70+
}
5971

60-
let mut output = [ascii::Char::Null; N];
61-
let lookup = LOOKUP[byte as usize];
62-
63-
// 8th bit indicates escape
64-
if lookup & 0x80 != 0 {
65-
output[0] = ascii::Char::ReverseSolidus;
66-
67-
// SAFETY: We explicitly mask out the eighth bit.
68-
let lookup = unsafe { ascii::Char::from_u8_unchecked(lookup & 0x7F) };
69-
70-
// NUL indicates hex-escaped
71-
if matches!(lookup, ascii::Char::Null) {
72-
let hi = HEX_DIGITS[(byte >> 4) as usize];
73-
let lo = HEX_DIGITS[(byte & 0xF) as usize];
74-
output[1] = ascii::Char::SmallX;
75-
output[2] = hi;
76-
output[3] = lo;
77-
(output, 0..4)
72+
#[cfg(not(feature = "optimize_for_size"))]
73+
{
74+
/// Lookup table helps us determine how to display character.
75+
///
76+
/// Since ASCII characters will always be 7 bits, we can exploit this to store the 8th bit to
77+
/// indicate whether the result is escaped or unescaped.
78+
///
79+
/// We additionally use 0x80 (escaped NUL character) to indicate hex-escaped bytes, since
80+
/// escaped NUL will not occur.
81+
const LOOKUP: [u8; 256] = {
82+
let mut arr = [0; 256];
83+
let mut idx = 0;
84+
loop {
85+
arr[idx as usize] = match idx {
86+
// use 8th bit to indicate escaped
87+
b'\t' => 0x80 | b't',
88+
b'\r' => 0x80 | b'r',
89+
b'\n' => 0x80 | b'n',
90+
b'\\' => 0x80 | b'\\',
91+
b'\'' => 0x80 | b'\'',
92+
b'"' => 0x80 | b'"',
93+
94+
// use NUL to indicate hex-escaped
95+
0x00..=0x1F | 0x7F..=0xFF => 0x80 | b'\0',
96+
97+
_ => idx,
98+
};
99+
if idx == 255 {
100+
break;
101+
}
102+
idx += 1;
103+
}
104+
arr
105+
};
106+
107+
let lookup = LOOKUP[byte as usize];
108+
109+
// 8th bit indicates escape
110+
let lookup_escaped = lookup & 0x80 != 0;
111+
112+
// SAFETY: We explicitly mask out the eighth bit to get a 7-bit ASCII character.
113+
let lookup_ascii = unsafe { ascii::Char::from_u8_unchecked(lookup & 0x7F) };
114+
115+
if lookup_escaped {
116+
// NUL indicates hex-escaped
117+
if matches!(lookup_ascii, ascii::Char::Null) {
118+
hex_escape(byte)
119+
} else {
120+
backslash(lookup_ascii)
121+
}
78122
} else {
79-
output[1] = lookup;
80-
(output, 0..2)
123+
verbatim(lookup_ascii)
81124
}
82-
} else {
83-
// SAFETY: We explicitly checked for the eighth bit.
84-
output[0] = unsafe { ascii::Char::from_u8_unchecked(lookup) };
85-
(output, 0..1)
86125
}
87126
}
88127

0 commit comments

Comments
 (0)