diff --git a/compiler/rustc_codegen_gcc/src/back/lto.rs b/compiler/rustc_codegen_gcc/src/back/lto.rs index e5221c7da3197..66c4e1e754dff 100644 --- a/compiler/rustc_codegen_gcc/src/back/lto.rs +++ b/compiler/rustc_codegen_gcc/src/back/lto.rs @@ -17,7 +17,7 @@ // /usr/bin/ld: warning: type of symbol `_RNvNvNvNvNtNtNtCsAj5i4SGTR7_3std4sync4mpmc5waker17current_thread_id5DUMMY7___getit5___KEY' changed from 1 to 6 in /tmp/ccKeUSiR.ltrans0.ltrans.o // /usr/bin/ld: warning: incremental linking of LTO and non-LTO objects; using -flinker-output=nolto-rel which will bypass whole program optimization use std::ffi::{CStr, CString}; -use std::fs::{self, File}; +use std::fs; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -126,9 +126,7 @@ fn prepare_lto( .extend(exported_symbols[&cnum].iter().filter_map(symbol_filter)); } - let archive_data = unsafe { - Mmap::map(File::open(path).expect("couldn't open rlib")).expect("couldn't map rlib") - }; + let archive_data = unsafe { Mmap::map(path).expect("couldn't map rlib") }; let archive = ArchiveFile::parse(&*archive_data).expect("wanted an rlib"); let obj_files = archive .members() diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs index 668795191a29a..594ebdb730751 100644 --- a/compiler/rustc_codegen_llvm/src/back/lto.rs +++ b/compiler/rustc_codegen_llvm/src/back/lto.rs @@ -113,10 +113,7 @@ fn prepare_lto( .extend(exported_symbols[&cnum].iter().filter_map(symbol_filter)); } - let archive_data = unsafe { - Mmap::map(std::fs::File::open(&path).expect("couldn't open rlib")) - .expect("couldn't map rlib") - }; + let archive_data = Mmap::map(&path).expect("couldn't map rlib"); let archive = ArchiveFile::parse(&*archive_data).expect("wanted an rlib"); let obj_files = archive .members() diff --git a/compiler/rustc_codegen_ssa/src/back/archive.rs b/compiler/rustc_codegen_ssa/src/back/archive.rs index 60af462b6b619..0fea7ef28c196 100644 --- a/compiler/rustc_codegen_ssa/src/back/archive.rs +++ b/compiler/rustc_codegen_ssa/src/back/archive.rs @@ -137,13 +137,8 @@ pub trait ArchiveBuilderBuilder { outdir: &Path, bundled_lib_file_names: &FxIndexSet, ) -> Result<(), ExtractBundledLibsError<'a>> { - let archive_map = unsafe { - Mmap::map( - File::open(rlib) - .map_err(|e| ExtractBundledLibsError::OpenFile { rlib, error: Box::new(e) })?, - ) - .map_err(|e| ExtractBundledLibsError::MmapFile { rlib, error: Box::new(e) })? - }; + let archive_map = Mmap::map(rlib) + .map_err(|e| ExtractBundledLibsError::MmapFile { rlib, error: Box::new(e) })?; let archive = ArchiveFile::parse(&*archive_map) .map_err(|e| ExtractBundledLibsError::ParseArchive { rlib, error: Box::new(e) })?; @@ -363,7 +358,7 @@ pub fn try_extract_macho_fat_archive( sess: &Session, archive_path: &Path, ) -> io::Result> { - let archive_map = unsafe { Mmap::map(File::open(&archive_path)?)? }; + let archive_map = Mmap::map(&archive_path)?; let target_arch = match sess.target.arch.as_ref() { "aarch64" => object::Architecture::Aarch64, "x86_64" => object::Architecture::X86_64, @@ -400,7 +395,7 @@ impl<'a> ArchiveBuilder for ArArchiveBuilder<'a> { return Ok(()); } - let archive_map = unsafe { Mmap::map(File::open(&archive_path)?)? }; + let archive_map = Mmap::map(&archive_path)?; let archive = ArchiveFile::parse(&*archive_map) .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?; let archive_index = self.src_archives.len(); @@ -463,25 +458,20 @@ impl<'a> ArArchiveBuilder<'a> { let mut entries = Vec::new(); for (entry_name, entry) in self.entries { - let data = - match entry { - ArchiveEntry::FromArchive { archive_index, file_range } => { - let src_archive = &self.src_archives[archive_index]; - - let data = &src_archive.1 - [file_range.0 as usize..file_range.0 as usize + file_range.1 as usize]; - - Box::new(data) as Box> - } - ArchiveEntry::File(file) => unsafe { - Box::new( - Mmap::map(File::open(file).map_err(|err| { - io_error_context("failed to open object file", err) - })?) - .map_err(|err| io_error_context("failed to map object file", err))?, - ) as Box> - }, - }; + let data = match entry { + ArchiveEntry::FromArchive { archive_index, file_range } => { + let src_archive = &self.src_archives[archive_index]; + + let data = &src_archive.1 + [file_range.0 as usize..file_range.0 as usize + file_range.1 as usize]; + + Box::new(data) as Box> + } + ArchiveEntry::File(file) => Box::new( + Mmap::map(file) + .map_err(|err| io_error_context("failed to map object file", err))?, + ) as Box>, + }; entries.push(NewArchiveMember { buf: data, diff --git a/compiler/rustc_codegen_ssa/src/back/link.rs b/compiler/rustc_codegen_ssa/src/back/link.rs index 7d9971c021db3..51a629bed683e 100644 --- a/compiler/rustc_codegen_ssa/src/back/link.rs +++ b/compiler/rustc_codegen_ssa/src/back/link.rs @@ -2,7 +2,7 @@ mod raw_dylib; use std::collections::BTreeSet; use std::ffi::OsString; -use std::fs::{File, OpenOptions, read}; +use std::fs::{OpenOptions, read}; use std::io::{BufWriter, Write}; use std::ops::{ControlFlow, Deref}; use std::path::{Path, PathBuf}; @@ -578,8 +578,7 @@ fn link_dwarf_object(sess: &Session, cg_results: &CodegenResults, executable_out } fn read_input(&self, path: &Path) -> std::io::Result<&[u8]> { - let file = File::open(&path)?; - let mmap = (unsafe { Mmap::map(file) })?; + let mmap = Mmap::map(&path)?; Ok(self.alloc_mmap(mmap)) } } diff --git a/compiler/rustc_codegen_ssa/src/back/metadata.rs b/compiler/rustc_codegen_ssa/src/back/metadata.rs index 68b453ff42425..e273ac5b6f7dd 100644 --- a/compiler/rustc_codegen_ssa/src/back/metadata.rs +++ b/compiler/rustc_codegen_ssa/src/back/metadata.rs @@ -1,7 +1,6 @@ //! Reading of the rustc metadata for rlibs and dylibs use std::borrow::Cow; -use std::fs::File; use std::io::Write; use std::path::Path; @@ -44,10 +43,7 @@ fn load_metadata_with( path: &Path, f: impl for<'a> FnOnce(&'a [u8]) -> Result<&'a [u8], String>, ) -> Result { - let file = - File::open(path).map_err(|e| format!("failed to open file '{}': {}", path.display(), e))?; - - unsafe { Mmap::map(file) } + Mmap::map(&path) .map_err(|e| format!("failed to mmap file '{}': {}", path.display(), e)) .and_then(|mmap| try_slice_owned(mmap, |mmap| f(mmap))) } diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs index 87992ce2e11b4..ac1c8dae515d5 100644 --- a/compiler/rustc_codegen_ssa/src/back/write.rs +++ b/compiler/rustc_codegen_ssa/src/back/write.rs @@ -2151,14 +2151,9 @@ pub(crate) fn submit_pre_lto_module_to_llvm( ) { let filename = pre_lto_bitcode_filename(&module.name); let bc_path = in_incr_comp_dir_sess(tcx.sess, &filename); - let file = fs::File::open(&bc_path) - .unwrap_or_else(|e| panic!("failed to open bitcode file `{}`: {}", bc_path.display(), e)); - let mmap = unsafe { - Mmap::map(file).unwrap_or_else(|e| { - panic!("failed to mmap bitcode file `{}`: {}", bc_path.display(), e) - }) - }; + let mmap = Mmap::map(&bc_path) + .unwrap_or_else(|e| panic!("failed to mmap bitcode file `{}`: {}", bc_path.display(), e)); // Schedule the module to be loaded drop(tx_to_llvm_workers.send(Box::new(Message::AddImportOnlyModule:: { module_data: SerializedModule::FromUncompressedFile(mmap), diff --git a/compiler/rustc_codegen_ssa/src/lib.rs b/compiler/rustc_codegen_ssa/src/lib.rs index 8ad040324147f..6c4233ce0f5d1 100644 --- a/compiler/rustc_codegen_ssa/src/lib.rs +++ b/compiler/rustc_codegen_ssa/src/lib.rs @@ -297,16 +297,14 @@ impl CodegenResults { ) -> Result<(Self, OutputFilenames), CodegenErrors> { // The Decodable machinery is not used here because it panics if the input data is invalid // and because its internal representation may change. - if !data.starts_with(RLINK_MAGIC) { + let Some(data) = data.strip_prefix(RLINK_MAGIC) else { return Err(CodegenErrors::WrongFileType); - } - let data = &data[RLINK_MAGIC.len()..]; - if data.len() < 4 { + }; + + let Some((&version_array, data)) = data.split_first_chunk() else { return Err(CodegenErrors::EmptyVersionNumber); - } + }; - let mut version_array: [u8; 4] = Default::default(); - version_array.copy_from_slice(&data[..4]); if u32::from_be_bytes(version_array) != RLINK_VERSION { return Err(CodegenErrors::EncodingVersionMismatch { version_array: String::from_utf8_lossy(&version_array).to_string(), @@ -314,7 +312,7 @@ impl CodegenResults { }); } - let Ok(mut decoder) = MemDecoder::new(&data[4..], 0) else { + let Ok(mut decoder) = MemDecoder::new(data, 0) else { return Err(CodegenErrors::CorruptFile); }; let rustc_version = decoder.read_str(); diff --git a/compiler/rustc_data_structures/src/memmap.rs b/compiler/rustc_data_structures/src/memmap.rs index d64a5862f4e76..9ff5ac6f9d691 100644 --- a/compiler/rustc_data_structures/src/memmap.rs +++ b/compiler/rustc_data_structures/src/memmap.rs @@ -1,6 +1,7 @@ use std::fs::File; use std::io; use std::ops::{Deref, DerefMut}; +use std::path::Path; /// A trivial wrapper for [`memmap2::Mmap`] (or `Vec` on WASM). #[cfg(not(any(miri, target_arch = "wasm32")))] @@ -11,13 +12,32 @@ pub struct Mmap(Vec); #[cfg(not(any(miri, target_arch = "wasm32")))] impl Mmap { - /// # Safety - /// /// The given file must not be mutated (i.e., not written, not truncated, ...) until the mapping is closed. /// - /// However in practice most callers do not ensure this, so uses of this function are likely unsound. + /// This process must not modify nor remove the backing file while the memory map lives. + /// For the dep-graph and the work product index, it is as soon as the decoding is done. + /// For the query result cache, the memory map is dropped in save_dep_graph before calling + /// save_in and trying to remove the backing file. + /// + /// There is no way to prevent another process from modifying this file. + /// + /// This means in practice all uses of this function are theoretically unsound, but also + /// the way rustc uses `Mmap` (reading bytes, validating them afterwards *anyway* to detect + /// corrupted files) avoids the actual issues this could cause. + /// + /// Someone may truncate our file, but then we'll SIGBUS, which is not great, but at least + /// we won't succeed with corrupted data. + /// + /// To get a bit more hardening out of this we will set the file as readonly before opening it. #[inline] - pub unsafe fn map(file: File) -> io::Result { + pub fn map(path: impl AsRef) -> io::Result { + let path = path.as_ref(); + let mut perms = std::fs::metadata(path)?.permissions(); + perms.set_readonly(true); + std::fs::set_permissions(path, perms)?; + + let file = File::open(path)?; + // By default, memmap2 creates shared mappings, implying that we could see updates to the // file through the mapping. That would violate our precondition; so by requesting a // map_copy_read_only we do not lose anything. @@ -25,19 +45,15 @@ impl Mmap { // For more details see https://github.com/rust-lang/rust/issues/122262 // // SAFETY: The caller must ensure that this is safe. - unsafe { memmap2::MmapOptions::new().map_copy_read_only(&file).map(Mmap) } + unsafe { Ok(Self(memmap2::MmapOptions::new().map_copy_read_only(&file)?)) } } } #[cfg(any(miri, target_arch = "wasm32"))] impl Mmap { #[inline] - pub unsafe fn map(mut file: File) -> io::Result { - use std::io::Read; - - let mut data = Vec::new(); - file.read_to_end(&mut data)?; - Ok(Mmap(data)) + pub fn map(path: impl AsRef) -> io::Result { + Ok(Mmap(std::fs::read(path)?)) } } diff --git a/compiler/rustc_incremental/src/persist/file_format.rs b/compiler/rustc_incremental/src/persist/file_format.rs index 9cec2702a4171..94f845c36148b 100644 --- a/compiler/rustc_incremental/src/persist/file_format.rs +++ b/compiler/rustc_incremental/src/persist/file_format.rs @@ -95,18 +95,11 @@ pub(crate) fn read_file( is_nightly_build: bool, cfg_version: &'static str, ) -> io::Result> { - let file = match fs::File::open(path) { + let mmap = match Mmap::map(path) { Ok(file) => file, Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(None), Err(err) => return Err(err), }; - // SAFETY: This process must not modify nor remove the backing file while the memory map lives. - // For the dep-graph and the work product index, it is as soon as the decoding is done. - // For the query result cache, the memory map is dropped in save_dep_graph before calling - // save_in and trying to remove the backing file. - // - // There is no way to prevent another process from modifying this file. - let mmap = unsafe { Mmap::map(file) }?; let mut file = io::Cursor::new(&*mmap); diff --git a/compiler/rustc_metadata/src/locator.rs b/compiler/rustc_metadata/src/locator.rs index d5dd5059aacc6..14977bc622ac5 100644 --- a/compiler/rustc_metadata/src/locator.rs +++ b/compiler/rustc_metadata/src/locator.rs @@ -822,13 +822,7 @@ fn get_metadata_section<'p>( } CrateFlavor::Rmeta => { // mmap the file, because only a small fraction of it is read. - let file = std::fs::File::open(filename).map_err(|_| { - MetadataError::LoadFailure(format!( - "failed to open rmeta metadata: '{}'", - filename.display() - )) - })?; - let mmap = unsafe { Mmap::map(file) }; + let mmap = unsafe { Mmap::map(filename) }; let mmap = mmap.map_err(|_| { MetadataError::LoadFailure(format!( "failed to mmap rmeta metadata: '{}'", diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index 88a88847e6b8b..11f4c16938b0c 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -2245,12 +2245,10 @@ pub struct EncodedMetadata { impl EncodedMetadata { #[inline] pub fn from_path(path: PathBuf, temp_dir: Option) -> std::io::Result { - let file = std::fs::File::open(&path)?; - let file_metadata = file.metadata()?; - if file_metadata.len() == 0 { + if std::fs::metadata(&path)?.len() == 0 { return Ok(Self { mmap: None, _temp_dir: None }); } - let mmap = unsafe { Some(Mmap::map(file)?) }; + let mmap = unsafe { Some(Mmap::map(path)?) }; Ok(Self { mmap, _temp_dir: temp_dir }) }