diff --git a/uefi-macros/CHANGELOG.md b/uefi-macros/CHANGELOG.md index 9ddd357fa..b56ff7fce 100644 --- a/uefi-macros/CHANGELOG.md +++ b/uefi-macros/CHANGELOG.md @@ -1,8 +1,8 @@ # uefi-macros - [Unreleased] ## Removed -- Removed the `cstr16` macro. Use the `cstr16` declarative macro exported by the - `uefi` crate instead. +- Removed the `cstr8` and `cstr16` macros. Use the declarative macros of the + same names exported by the `uefi` crate as a replacement. # uefi-macros - 0.13.0 (2023-11-12) diff --git a/uefi-macros/src/lib.rs b/uefi-macros/src/lib.rs index 635494304..9f5b67da8 100644 --- a/uefi-macros/src/lib.rs +++ b/uefi-macros/src/lib.rs @@ -9,7 +9,7 @@ use quote::{quote, quote_spanned, TokenStreamExt}; use syn::spanned::Spanned; use syn::{ parse_macro_input, parse_quote, Error, Expr, ExprLit, ExprPath, FnArg, Ident, ItemFn, - ItemStruct, Lit, LitStr, Pat, Visibility, + ItemStruct, Lit, Pat, Visibility, }; macro_rules! err { @@ -247,44 +247,3 @@ pub fn entry(args: TokenStream, input: TokenStream) -> TokenStream { }; result.into() } - -/// Builds a `CStr8` literal at compile time from a string literal. -/// -/// This will throw a compile error if an invalid character is in the passed string. -/// -/// # Example -/// ``` -/// # use uefi_macros::cstr8; -/// // Empty string -/// assert_eq!(cstr8!().to_u16_slice_with_nul(), [0]); -/// assert_eq!(cstr8!("").to_u16_slice_with_nul(), [0]); -/// // Non-empty string -/// assert_eq!(cstr8!("test").as_bytes(), [116, 101, 115, 116, 0]); -/// ``` -#[proc_macro] -pub fn cstr8(input: proc_macro::TokenStream) -> proc_macro::TokenStream { - // Accept empty input. - if input.is_empty() { - return quote!(unsafe { ::uefi::CStr16::from_u16_with_nul_unchecked(&[0]) }).into(); - } - let input: LitStr = parse_macro_input!(input); - let input = input.value(); - // Accept "" input. - if input.is_empty() { - return quote!(unsafe { ::uefi::CStr16::from_u16_with_nul_unchecked(&[0]) }).into(); - } - - // Accept any non-empty string input. - match input - .chars() - .map(u8::try_from) - .collect::, _>>() - { - Ok(c) => { - quote!(unsafe { ::uefi::CStr8::from_bytes_with_nul_unchecked(&[ #(#c),* , 0 ]) }).into() - } - Err(_) => syn::Error::new_spanned(input, "invalid character in string") - .into_compile_error() - .into(), - } -} diff --git a/uefi/src/data_types/mod.rs b/uefi/src/data_types/mod.rs index d4be9e772..74239c025 100644 --- a/uefi/src/data_types/mod.rs +++ b/uefi/src/data_types/mod.rs @@ -151,6 +151,10 @@ pub use strs::{ CStr16, CStr8, EqStrUntilNul, FromSliceWithNulError, FromStrWithBufError, UnalignedCStr16Error, }; +/// These functions are used in the implementation of the [`cstr8`] macro. +#[doc(hidden)] +pub use strs::{str_num_latin1_chars, str_to_latin1}; + #[cfg(feature = "alloc")] mod owned_strs; #[cfg(feature = "alloc")] diff --git a/uefi/src/data_types/strs.rs b/uefi/src/data_types/strs.rs index 20c23db3c..2855e8802 100644 --- a/uefi/src/data_types/strs.rs +++ b/uefi/src/data_types/strs.rs @@ -221,6 +221,93 @@ impl<'a> TryFrom<&'a CStr> for &'a CStr8 { } } +/// Get a Latin-1 character from a UTF-8 byte slice at the given offset. +/// +/// Returns a pair containing the Latin-1 character and the number of bytes in +/// the UTF-8 encoding of that character. +/// +/// Panics if the string cannot be encoded in Latin-1. +/// +/// # Safety +/// +/// The input `bytes` must be valid UTF-8. +const unsafe fn latin1_from_utf8_at_offset(bytes: &[u8], offset: usize) -> (u8, usize) { + if bytes[offset] & 0b1000_0000 == 0b0000_0000 { + (bytes[offset], 1) + } else if bytes[offset] & 0b1110_0000 == 0b1100_0000 { + let a = (bytes[offset] & 0b0001_1111) as u16; + let b = (bytes[offset + 1] & 0b0011_1111) as u16; + let ch = a << 6 | b; + if ch > 0xff { + panic!("input string cannot be encoded as Latin-1"); + } + (ch as u8, 2) + } else { + // Latin-1 code points only go up to 0xff, so if the input contains any + // UTF-8 characters larger than two bytes it cannot be converted to + // Latin-1. + panic!("input string cannot be encoded as Latin-1"); + } +} + +/// Count the number of Latin-1 characters in a string. +/// +/// Panics if the string cannot be encoded in Latin-1. +/// +/// This is public but hidden; it is used in the `cstr8` macro. +#[must_use] +pub const fn str_num_latin1_chars(s: &str) -> usize { + let bytes = s.as_bytes(); + let len = bytes.len(); + + let mut offset = 0; + let mut num_latin1_chars = 0; + + while offset < len { + // SAFETY: `bytes` is valid UTF-8. + let (_, num_utf8_bytes) = unsafe { latin1_from_utf8_at_offset(bytes, offset) }; + offset += num_utf8_bytes; + num_latin1_chars += 1; + } + + num_latin1_chars +} + +/// Convert a `str` into a null-terminated Latin-1 character array. +/// +/// Panics if the string cannot be encoded in Latin-1. +/// +/// This is public but hidden; it is used in the `cstr8` macro. +#[must_use] +pub const fn str_to_latin1(s: &str) -> [u8; N] { + let bytes = s.as_bytes(); + let len = bytes.len(); + + let mut output = [0; N]; + + let mut output_offset = 0; + let mut input_offset = 0; + while input_offset < len { + // SAFETY: `bytes` is valid UTF-8. + let (ch, num_utf8_bytes) = unsafe { latin1_from_utf8_at_offset(bytes, input_offset) }; + if ch == 0 { + panic!("interior null character"); + } else { + output[output_offset] = ch; + output_offset += 1; + input_offset += num_utf8_bytes; + } + } + + // The output array must be one bigger than the converted string, + // to leave room for the trailing null character. + if output_offset + 1 != N { + panic!("incorrect array length"); + } + + output +} + /// An UCS-2 null-terminated string slice. /// /// This type is largely inspired by [`core::ffi::CStr`] with the exception that all characters are diff --git a/uefi/src/lib.rs b/uefi/src/lib.rs index 651e1b3d7..9a425aac6 100644 --- a/uefi/src/lib.rs +++ b/uefi/src/lib.rs @@ -113,7 +113,7 @@ pub mod data_types; #[cfg(feature = "alloc")] pub use data_types::CString16; pub use data_types::{CStr16, CStr8, Char16, Char8, Event, Guid, Handle, Identify}; -pub use uefi_macros::{cstr8, entry}; +pub use uefi_macros::entry; pub use uguid::guid; mod result; @@ -140,17 +140,3 @@ pub mod helpers; mod macros; mod util; - -#[cfg(test)] -// Crates that create procedural macros can't unit test the macros they export. -// Therefore, we do some tests here. -mod macro_tests { - use crate::cstr8; - - #[test] - fn cstr8_macro_literal() { - let _empty1 = cstr8!(); - let _empty2 = cstr8!(""); - let _regular = cstr8!("foobar"); - } -} diff --git a/uefi/src/macros.rs b/uefi/src/macros.rs index 4e9897157..5bb2f7b79 100644 --- a/uefi/src/macros.rs +++ b/uefi/src/macros.rs @@ -1,3 +1,47 @@ +/// Encode a string literal as a [`&CStr8`]. +/// +/// The encoding is done at compile time, so the result can be used in a +/// `const` item. +/// +/// An empty string containing just a null character can be created with either +/// `cstr8!()` or `cstr8!("")`. +/// +/// # Example +/// +/// ``` +/// use uefi::{CStr8, cstr8}; +/// +/// const S: &CStr8 = cstr8!("abÿ"); +/// assert_eq!(S.as_bytes(), [97, 98, 255, 0]); +/// +/// const EMPTY: &CStr8 = cstr8!(); +/// assert_eq!(EMPTY.as_bytes(), [0]); +/// assert_eq!(cstr8!(""), EMPTY); +/// ``` +/// +/// [`&CStr8`]: crate::CStr8 +#[macro_export] +macro_rules! cstr8 { + () => {{ + const S: &[u8] = &[0]; + // SAFETY: `S` is a trivially correct Latin-1 C string. + unsafe { $crate::CStr8::from_bytes_with_nul_unchecked(S) } + }}; + ($s:literal) => {{ + // Use `const` values here to force errors to happen at compile + // time. + + // Add one for the null char. + const NUM_CHARS: usize = $crate::data_types::str_num_latin1_chars($s) + 1; + + const VAL: [u8; NUM_CHARS] = $crate::data_types::str_to_latin1($s); + + // SAFETY: the `str_to_latin1` function always produces a valid Latin-1 + // string with a trailing null character. + unsafe { $crate::CStr8::from_bytes_with_nul_unchecked(&VAL) } + }}; +} + /// Encode a string literal as a [`&CStr16`]. /// /// The encoding is done at compile time, so the result can be used in a