Restore old algorithm with optimize_for_size, slightly refactor to share some code (doesn't seem to affect benchmarks)

clarfonthey · clarfonthey · commit da2b6947e143 · 2024-09-01T15:32:36.000-04:00
diff --git a/library/core/src/escape.rs b/library/core/src/escape.rs
@@ -18,71 +18,110 @@ const fn backslash<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u
     (output, 0..2)
 }
 
+#[inline]
+const fn hex_escape<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) {
+    const { assert!(N >= 4) };
+
+    let mut output = [ascii::Char::Null; N];
+
+    let hi = HEX_DIGITS[(byte >> 4) as usize];
+    let lo = HEX_DIGITS[(byte & 0xf) as usize];
+
+    output[0] = ascii::Char::ReverseSolidus;
+    output[1] = ascii::Char::SmallX;
+    output[2] = hi;
+    output[3] = lo;
+
+    (output, 0..4)
+}
+
+#[inline]
+const fn verbatim<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u8>) {
+    const { assert!(N >= 1) };
+
+    let mut output = [ascii::Char::Null; N];
+
+    output[0] = a;
+
+    (output, 0..1)
+}
+
 /// Escapes an ASCII character.
 ///
 /// Returns a buffer and the length of the escaped representation.
 const fn escape_ascii<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) {
     const { assert!(N >= 4) };
 
-    /// Lookup table helps us determine how to display character.
-    ///
-    /// Since ASCII characters will always be 7 bits, we can exploit this to store the 8th bit to
-    /// indicate whether the result is escaped or unescaped.
-    ///
-    /// We additionally use 0x80 (escaped NUL character) to indicate hex-escaped bytes, since
-    /// escaped NUL will not occur.
-    const LOOKUP: [u8; 256] = {
-        let mut arr = [0; 256];
-        let mut idx = 0;
-        loop {
-            arr[idx as usize] = match idx {
-                // use 8th bit to indicate escaped
-                b'\t' => 0x80 | b't',
-                b'\r' => 0x80 | b'r',
-                b'\n' => 0x80 | b'n',
-                b'\\' => 0x80 | b'\\',
-                b'\'' => 0x80 | b'\'',
-                b'"' => 0x80 | b'"',
-
-                // use NUL to indicate hex-escaped
-                0x00..=0x1F | 0x7F..=0xFF => 0x80 | b'\0',
-
-                _ => idx,
-            };
-            if idx == 255 {
-                break;
-            }
-            idx += 1;
+    #[cfg(feature = "optimize_for_size")]
+    {
+        match byte {
+            b'\t' => backslash(ascii::Char::SmallT),
+            b'\r' => backslash(ascii::Char::SmallR),
+            b'\n' => backslash(ascii::Char::SmallN),
+            b'\\' => backslash(ascii::Char::ReverseSolidus),
+            b'\'' => backslash(ascii::Char::Apostrophe),
+            b'\"' => backslash(ascii::Char::QuotationMark),
+            0x00..=0x1F => hex_escape(byte),
+            _ => match ascii::Char::from_u8(byte) {
+                Some(a) => verbatim(a),
+                None => hex_escape(byte),
+            },
         }
-        arr
-    };
+    }
 
-    let mut output = [ascii::Char::Null; N];
-    let lookup = LOOKUP[byte as usize];
-
-    // 8th bit indicates escape
-    if lookup & 0x80 != 0 {
-        output[0] = ascii::Char::ReverseSolidus;
-
-        // SAFETY: We explicitly mask out the eighth bit.
-        let lookup = unsafe { ascii::Char::from_u8_unchecked(lookup & 0x7F) };
-
-        // NUL indicates hex-escaped
-        if matches!(lookup, ascii::Char::Null) {
-            let hi = HEX_DIGITS[(byte >> 4) as usize];
-            let lo = HEX_DIGITS[(byte & 0xF) as usize];
-            output[1] = ascii::Char::SmallX;
-            output[2] = hi;
-            output[3] = lo;
-            (output, 0..4)
+    #[cfg(not(feature = "optimize_for_size"))]
+    {
+        /// Lookup table helps us determine how to display character.
+        ///
+        /// Since ASCII characters will always be 7 bits, we can exploit this to store the 8th bit to
+        /// indicate whether the result is escaped or unescaped.
+        ///
+        /// We additionally use 0x80 (escaped NUL character) to indicate hex-escaped bytes, since
+        /// escaped NUL will not occur.
+        const LOOKUP: [u8; 256] = {
+            let mut arr = [0; 256];
+            let mut idx = 0;
+            loop {
+                arr[idx as usize] = match idx {
+                    // use 8th bit to indicate escaped
+                    b'\t' => 0x80 | b't',
+                    b'\r' => 0x80 | b'r',
+                    b'\n' => 0x80 | b'n',
+                    b'\\' => 0x80 | b'\\',
+                    b'\'' => 0x80 | b'\'',
+                    b'"' => 0x80 | b'"',
+
+                    // use NUL to indicate hex-escaped
+                    0x00..=0x1F | 0x7F..=0xFF => 0x80 | b'\0',
+
+                    _ => idx,
+                };
+                if idx == 255 {
+                    break;
+                }
+                idx += 1;
+            }
+            arr
+        };
+
+        let lookup = LOOKUP[byte as usize];
+
+        // 8th bit indicates escape
+        let lookup_escaped = lookup & 0x80 != 0;
+
+        // SAFETY: We explicitly mask out the eighth bit to get a 7-bit ASCII character.
+        let lookup_ascii = unsafe { ascii::Char::from_u8_unchecked(lookup & 0x7F) };
+
+        if lookup_escaped {
+            // NUL indicates hex-escaped
+            if matches!(lookup_ascii, ascii::Char::Null) {
+                hex_escape(byte)
+            } else {
+                backslash(lookup_ascii)
+            }
         } else {
-            output[1] = lookup;
-            (output, 0..2)
+            verbatim(lookup_ascii)
         }
-    } else {
-        // SAFETY: We explicitly checked for the eighth bit.
-        output[0] = unsafe { ascii::Char::from_u8_unchecked(lookup) };
-        (output, 0..1)
     }
 }