diff --git a/src/libsyntax/ext/expand.rs b/src/libsyntax/ext/expand.rs index 402b42dfbc80d..e978976438367 100644 --- a/src/libsyntax/ext/expand.rs +++ b/src/libsyntax/ext/expand.rs @@ -25,7 +25,6 @@ use syntax_pos::{Span, DUMMY_SP, FileName}; use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::sync::Lrc; -use std::fs; use std::io::ErrorKind; use std::{iter, mem}; use std::ops::DerefMut; @@ -1239,13 +1238,11 @@ impl<'a, 'b> MutVisitor for InvocationCollector<'a, 'b> { } let filename = self.cx.resolve_path(&*file.as_str(), it.span()); - match fs::read_to_string(&filename) { - Ok(src) => { - let src_interned = Symbol::intern(&src); - - // Add this input file to the code map to make it available as - // dependency information - self.cx.source_map().new_source_file(filename.into(), src); + match self.cx.source_map().load_file(&filename) { + Ok(source_file) => { + let src = source_file.src.as_ref() + .expect("freshly loaded file should have a source"); + let src_interned = Symbol::intern(src.as_str()); let include_info = vec![ ast::NestedMetaItem::MetaItem( diff --git a/src/libsyntax/source_map.rs b/src/libsyntax/source_map.rs index 74cab00d3c1eb..ceaa5ee3aa54b 100644 --- a/src/libsyntax/source_map.rs +++ b/src/libsyntax/source_map.rs @@ -170,6 +170,26 @@ impl SourceMap { Ok(self.new_source_file(filename, src)) } + /// Loads source file as a binary blob. + /// + /// Unlike `load_file`, guarantees that no normalization like BOM-removal + /// takes place. + pub fn load_binary_file(&self, path: &Path) -> io::Result> { + // Ideally, this should use `self.file_loader`, but it can't + // deal with binary files yet. + let bytes = fs::read(path)?; + + // We need to add file to the `SourceMap`, so that it is present + // in dep-info. There's also an edge case that file might be both + // loaded as a binary via `include_bytes!` and as proper `SourceFile` + // via `mod`, so we try to use real file contents and not just an + // empty string. + let text = std::str::from_utf8(&bytes).unwrap_or("") + .to_string(); + self.new_source_file(path.to_owned().into(), text); + Ok(bytes) + } + pub fn files(&self) -> MappedLockGuard<'_, Vec>> { LockGuard::map(self.files.borrow(), |files| &mut files.source_files) } diff --git a/src/libsyntax_ext/source_util.rs b/src/libsyntax_ext/source_util.rs index cbc01b48afd03..e008ed710e4d0 100644 --- a/src/libsyntax_ext/source_util.rs +++ b/src/libsyntax_ext/source_util.rs @@ -9,8 +9,6 @@ use syntax::tokenstream; use smallvec::SmallVec; use syntax_pos::{self, Pos, Span}; -use std::fs; -use std::io::ErrorKind; use rustc_data_structures::sync::Lrc; // These macros all relate to the file system; they either return @@ -114,20 +112,17 @@ pub fn expand_include_str(cx: &mut ExtCtxt<'_>, sp: Span, tts: &[tokenstream::To None => return DummyResult::any(sp) }; let file = cx.resolve_path(file, sp); - match fs::read_to_string(&file) { - Ok(src) => { - let interned_src = Symbol::intern(&src); - - // Add this input file to the code map to make it available as - // dependency information - cx.source_map().new_source_file(file.into(), src); - - base::MacEager::expr(cx.expr_str(sp, interned_src)) + match cx.source_map().load_binary_file(&file) { + Ok(bytes) => match std::str::from_utf8(&bytes) { + Ok(src) => { + let interned_src = Symbol::intern(&src); + base::MacEager::expr(cx.expr_str(sp, interned_src)) + } + Err(_) => { + cx.span_err(sp, &format!("{} wasn't a utf-8 file", file.display())); + DummyResult::any(sp) + } }, - Err(ref e) if e.kind() == ErrorKind::InvalidData => { - cx.span_err(sp, &format!("{} wasn't a utf-8 file", file.display())); - DummyResult::any(sp) - } Err(e) => { cx.span_err(sp, &format!("couldn't read {}: {}", file.display(), e)); DummyResult::any(sp) @@ -142,18 +137,8 @@ pub fn expand_include_bytes(cx: &mut ExtCtxt<'_>, sp: Span, tts: &[tokenstream:: None => return DummyResult::any(sp) }; let file = cx.resolve_path(file, sp); - match fs::read(&file) { + match cx.source_map().load_binary_file(&file) { Ok(bytes) => { - // Add the contents to the source map if it contains UTF-8. - let (contents, bytes) = match String::from_utf8(bytes) { - Ok(s) => { - let bytes = s.as_bytes().to_owned(); - (s, bytes) - }, - Err(e) => (String::new(), e.into_bytes()), - }; - cx.source_map().new_source_file(file.into(), contents); - base::MacEager::expr(cx.expr_lit(sp, ast::LitKind::ByteStr(Lrc::new(bytes)))) }, Err(e) => { diff --git a/src/test/ui/.gitattributes b/src/test/ui/.gitattributes index b62ade73aa993..489dc8ad1118c 100644 --- a/src/test/ui/.gitattributes +++ b/src/test/ui/.gitattributes @@ -1,2 +1,3 @@ lexer-crlf-line-endings-string-literal-doc-comment.rs -text trailing-carriage-return-in-string.rs -text +*.bin -text diff --git a/src/test/ui/include-macros/data.bin b/src/test/ui/include-macros/data.bin new file mode 100644 index 0000000000000..ce4e0b8311a31 --- /dev/null +++ b/src/test/ui/include-macros/data.bin @@ -0,0 +1,2 @@ +This file starts with BOM. +Lines are separated by \r\n. diff --git a/src/test/ui/include-macros/normalization.rs b/src/test/ui/include-macros/normalization.rs new file mode 100644 index 0000000000000..889f08e606ec9 --- /dev/null +++ b/src/test/ui/include-macros/normalization.rs @@ -0,0 +1,12 @@ +// run-pass + +fn main() { + assert_eq!( + &include_bytes!("data.bin")[..], + &b"\xEF\xBB\xBFThis file starts with BOM.\r\nLines are separated by \\r\\n.\r\n"[..], + ); + assert_eq!( + include_str!("data.bin"), + "\u{FEFF}This file starts with BOM.\r\nLines are separated by \\r\\n.\r\n", + ); +}