Skip to content

Replace cstr8! with a declarative macro #1151

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions uefi-macros/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# uefi-macros - [Unreleased]

## Removed
- Removed the `cstr16` macro. Use the `cstr16` declarative macro exported by the
`uefi` crate instead.
- Removed the `cstr8` and `cstr16` macros. Use the declarative macros of the
same names exported by the `uefi` crate as a replacement.

# uefi-macros - 0.13.0 (2023-11-12)

Expand Down
43 changes: 1 addition & 42 deletions uefi-macros/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use quote::{quote, quote_spanned, TokenStreamExt};
use syn::spanned::Spanned;
use syn::{
parse_macro_input, parse_quote, Error, Expr, ExprLit, ExprPath, FnArg, Ident, ItemFn,
ItemStruct, Lit, LitStr, Pat, Visibility,
ItemStruct, Lit, Pat, Visibility,
};

macro_rules! err {
Expand Down Expand Up @@ -247,44 +247,3 @@ pub fn entry(args: TokenStream, input: TokenStream) -> TokenStream {
};
result.into()
}

/// Builds a `CStr8` literal at compile time from a string literal.
///
/// This will throw a compile error if an invalid character is in the passed string.
///
/// # Example
/// ```
/// # use uefi_macros::cstr8;
/// // Empty string
/// assert_eq!(cstr8!().to_u16_slice_with_nul(), [0]);
/// assert_eq!(cstr8!("").to_u16_slice_with_nul(), [0]);
/// // Non-empty string
/// assert_eq!(cstr8!("test").as_bytes(), [116, 101, 115, 116, 0]);
/// ```
#[proc_macro]
pub fn cstr8(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
// Accept empty input.
if input.is_empty() {
return quote!(unsafe { ::uefi::CStr16::from_u16_with_nul_unchecked(&[0]) }).into();
}
let input: LitStr = parse_macro_input!(input);
let input = input.value();
// Accept "" input.
if input.is_empty() {
return quote!(unsafe { ::uefi::CStr16::from_u16_with_nul_unchecked(&[0]) }).into();
}

// Accept any non-empty string input.
match input
.chars()
.map(u8::try_from)
.collect::<Result<Vec<u8>, _>>()
{
Ok(c) => {
quote!(unsafe { ::uefi::CStr8::from_bytes_with_nul_unchecked(&[ #(#c),* , 0 ]) }).into()
}
Err(_) => syn::Error::new_spanned(input, "invalid character in string")
.into_compile_error()
.into(),
}
}
4 changes: 4 additions & 0 deletions uefi/src/data_types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,10 @@ pub use strs::{
CStr16, CStr8, EqStrUntilNul, FromSliceWithNulError, FromStrWithBufError, UnalignedCStr16Error,
};

/// These functions are used in the implementation of the [`cstr8`] macro.
#[doc(hidden)]
pub use strs::{str_num_latin1_chars, str_to_latin1};

#[cfg(feature = "alloc")]
mod owned_strs;
#[cfg(feature = "alloc")]
Expand Down
87 changes: 87 additions & 0 deletions uefi/src/data_types/strs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,93 @@ impl<'a> TryFrom<&'a CStr> for &'a CStr8 {
}
}

/// Get a Latin-1 character from a UTF-8 byte slice at the given offset.
///
/// Returns a pair containing the Latin-1 character and the number of bytes in
/// the UTF-8 encoding of that character.
///
/// Panics if the string cannot be encoded in Latin-1.
///
/// # Safety
///
/// The input `bytes` must be valid UTF-8.
const unsafe fn latin1_from_utf8_at_offset(bytes: &[u8], offset: usize) -> (u8, usize) {
if bytes[offset] & 0b1000_0000 == 0b0000_0000 {
(bytes[offset], 1)
} else if bytes[offset] & 0b1110_0000 == 0b1100_0000 {
let a = (bytes[offset] & 0b0001_1111) as u16;
let b = (bytes[offset + 1] & 0b0011_1111) as u16;
let ch = a << 6 | b;
if ch > 0xff {
panic!("input string cannot be encoded as Latin-1");
}
(ch as u8, 2)
} else {
// Latin-1 code points only go up to 0xff, so if the input contains any
// UTF-8 characters larger than two bytes it cannot be converted to
// Latin-1.
panic!("input string cannot be encoded as Latin-1");
}
}

/// Count the number of Latin-1 characters in a string.
///
/// Panics if the string cannot be encoded in Latin-1.
///
/// This is public but hidden; it is used in the `cstr8` macro.
#[must_use]
pub const fn str_num_latin1_chars(s: &str) -> usize {
let bytes = s.as_bytes();
let len = bytes.len();

let mut offset = 0;
let mut num_latin1_chars = 0;

while offset < len {
// SAFETY: `bytes` is valid UTF-8.
let (_, num_utf8_bytes) = unsafe { latin1_from_utf8_at_offset(bytes, offset) };
offset += num_utf8_bytes;
num_latin1_chars += 1;
}

num_latin1_chars
}

/// Convert a `str` into a null-terminated Latin-1 character array.
///
/// Panics if the string cannot be encoded in Latin-1.
///
/// This is public but hidden; it is used in the `cstr8` macro.
#[must_use]
pub const fn str_to_latin1<const N: usize>(s: &str) -> [u8; N] {
let bytes = s.as_bytes();
let len = bytes.len();

let mut output = [0; N];

let mut output_offset = 0;
let mut input_offset = 0;
while input_offset < len {
// SAFETY: `bytes` is valid UTF-8.
let (ch, num_utf8_bytes) = unsafe { latin1_from_utf8_at_offset(bytes, input_offset) };
if ch == 0 {
panic!("interior null character");
} else {
output[output_offset] = ch;
output_offset += 1;
input_offset += num_utf8_bytes;
}
}

// The output array must be one bigger than the converted string,
// to leave room for the trailing null character.
if output_offset + 1 != N {
panic!("incorrect array length");
}

output
}

/// An UCS-2 null-terminated string slice.
///
/// This type is largely inspired by [`core::ffi::CStr`] with the exception that all characters are
Expand Down
16 changes: 1 addition & 15 deletions uefi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ pub mod data_types;
#[cfg(feature = "alloc")]
pub use data_types::CString16;
pub use data_types::{CStr16, CStr8, Char16, Char8, Event, Guid, Handle, Identify};
pub use uefi_macros::{cstr8, entry};
pub use uefi_macros::entry;
pub use uguid::guid;

mod result;
Expand All @@ -140,17 +140,3 @@ pub mod helpers;

mod macros;
mod util;

#[cfg(test)]
// Crates that create procedural macros can't unit test the macros they export.
// Therefore, we do some tests here.
mod macro_tests {
use crate::cstr8;

#[test]
fn cstr8_macro_literal() {
let _empty1 = cstr8!();
let _empty2 = cstr8!("");
let _regular = cstr8!("foobar");
}
}
44 changes: 44 additions & 0 deletions uefi/src/macros.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,47 @@
/// Encode a string literal as a [`&CStr8`].
///
/// The encoding is done at compile time, so the result can be used in a
/// `const` item.
///
/// An empty string containing just a null character can be created with either
/// `cstr8!()` or `cstr8!("")`.
///
/// # Example
///
/// ```
/// use uefi::{CStr8, cstr8};
///
/// const S: &CStr8 = cstr8!("abÿ");
/// assert_eq!(S.as_bytes(), [97, 98, 255, 0]);
///
/// const EMPTY: &CStr8 = cstr8!();
/// assert_eq!(EMPTY.as_bytes(), [0]);
/// assert_eq!(cstr8!(""), EMPTY);
/// ```
///
/// [`&CStr8`]: crate::CStr8
#[macro_export]
macro_rules! cstr8 {
() => {{
const S: &[u8] = &[0];
// SAFETY: `S` is a trivially correct Latin-1 C string.
unsafe { $crate::CStr8::from_bytes_with_nul_unchecked(S) }
}};
($s:literal) => {{
// Use `const` values here to force errors to happen at compile
// time.

// Add one for the null char.
const NUM_CHARS: usize = $crate::data_types::str_num_latin1_chars($s) + 1;

const VAL: [u8; NUM_CHARS] = $crate::data_types::str_to_latin1($s);

// SAFETY: the `str_to_latin1` function always produces a valid Latin-1
// string with a trailing null character.
unsafe { $crate::CStr8::from_bytes_with_nul_unchecked(&VAL) }
}};
}

/// Encode a string literal as a [`&CStr16`].
///
/// The encoding is done at compile time, so the result can be used in a
Expand Down