Skip to content

Commit e43aa23

Browse files
committed
Introduce -Zsplit-metadata option
This will split the crate metadata out of library files. Instead only the svh is preserved to allow for loading the right rmeta file. This significicantly reduces library size. In addition it allows for cheaper checks if different library files are the same crate.
1 parent 39cb338 commit e43aa23

File tree

8 files changed

+97
-27
lines changed

8 files changed

+97
-27
lines changed

compiler/rustc_codegen_ssa/src/back/link.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ fn link_rlib<'a>(
306306
let (metadata, metadata_position) = create_wrapper_file(
307307
sess,
308308
".rmeta".to_string(),
309-
codegen_results.metadata.raw_data(),
309+
codegen_results.metadata.maybe_reference(),
310310
);
311311
let metadata = emit_wrapper_file(sess, &metadata, tmpdir, METADATA_FILENAME);
312312
match metadata_position {

compiler/rustc_codegen_ssa/src/back/metadata.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -578,8 +578,8 @@ pub fn create_compressed_metadata_file(
578578
symbol_name: &str,
579579
) -> Vec<u8> {
580580
let mut packed_metadata = rustc_metadata::METADATA_HEADER.to_vec();
581-
packed_metadata.write_all(&(metadata.raw_data().len() as u64).to_le_bytes()).unwrap();
582-
packed_metadata.extend(metadata.raw_data());
581+
packed_metadata.write_all(&(metadata.maybe_reference().len() as u64).to_le_bytes()).unwrap();
582+
packed_metadata.extend(metadata.maybe_reference());
583583

584584
let Some(mut file) = create_object_file(sess) else {
585585
if sess.target.is_like_wasm {

compiler/rustc_interface/src/tests.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -847,6 +847,7 @@ fn test_unstable_options_tracking_hash() {
847847
tracked!(simulate_remapped_rust_src_base, Some(PathBuf::from("/rustc/abc")));
848848
tracked!(small_data_threshold, Some(16));
849849
tracked!(split_lto_unit, Some(true));
850+
tracked!(split_metadata, true);
850851
tracked!(src_hash_algorithm, Some(SourceFileHashAlgorithm::Sha1));
851852
tracked!(stack_protector, StackProtector::All);
852853
tracked!(teach, true);

compiler/rustc_metadata/src/fs.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
5050
.tempdir_in(out_filename.parent().unwrap_or_else(|| Path::new("")))
5151
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailedCreateTempdir { err }));
5252
let metadata_tmpdir = MaybeTempDir::new(metadata_tmpdir, tcx.sess.opts.cg.save_temps);
53-
let metadata_filename = metadata_tmpdir.as_ref().join(METADATA_FILENAME);
53+
let metadata_filename = metadata_tmpdir.as_ref().join("full.rmeta");
54+
let metadata_reference_filename = metadata_tmpdir.as_ref().join("ref.rmeta");
5455

5556
// Always create a file at `metadata_filename`, even if we have nothing to write to it.
5657
// This simplifies the creation of the output `out_filename` when requested.
@@ -60,9 +61,12 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
6061
std::fs::File::create(&metadata_filename).unwrap_or_else(|err| {
6162
tcx.dcx().emit_fatal(FailedCreateFile { filename: &metadata_filename, err });
6263
});
64+
std::fs::File::create(&metadata_reference_filename).unwrap_or_else(|err| {
65+
tcx.dcx().emit_fatal(FailedCreateFile { filename: &metadata_filename, err });
66+
});
6367
}
6468
MetadataKind::Uncompressed | MetadataKind::Compressed => {
65-
encode_metadata(tcx, &metadata_filename);
69+
encode_metadata(tcx, &metadata_filename, &metadata_reference_filename)
6670
}
6771
};
6872

@@ -100,9 +104,10 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
100104

101105
// Load metadata back to memory: codegen may need to include it in object files.
102106
let metadata =
103-
EncodedMetadata::from_path(metadata_filename, metadata_tmpdir).unwrap_or_else(|err| {
104-
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
105-
});
107+
EncodedMetadata::from_path(metadata_filename, metadata_reference_filename, metadata_tmpdir)
108+
.unwrap_or_else(|err| {
109+
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
110+
});
106111

107112
let need_metadata_module = metadata_kind == MetadataKind::Compressed;
108113

compiler/rustc_metadata/src/locator.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,11 @@ impl<'a> CrateLocator<'a> {
581581
) {
582582
Ok(blob) => {
583583
if let Some(h) = self.crate_matches(&blob, &lib) {
584+
if blob.get_header().is_reference {
585+
if slot.is_none() {
586+
todo!("return error");
587+
}
588+
}
584589
(h, blob)
585590
} else {
586591
info!("metadata mismatch");

compiler/rustc_metadata/src/rmeta/encoder.rs

Lines changed: 72 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
701701
triple: tcx.sess.opts.target_triple.clone(),
702702
hash: tcx.crate_hash(LOCAL_CRATE),
703703
is_proc_macro_crate: proc_macro_data.is_some(),
704+
is_reference: false,
704705
},
705706
extra_filename: tcx.sess.opts.cg.extra_filename.clone(),
706707
stable_crate_id: tcx.def_path_hash(LOCAL_CRATE.as_def_id()).stable_crate_id(),
@@ -2206,42 +2207,61 @@ fn prefetch_mir(tcx: TyCtxt<'_>) {
22062207
// generated regardless of trailing bytes that end up in it.
22072208

22082209
pub struct EncodedMetadata {
2209-
// The declaration order matters because `mmap` should be dropped before `_temp_dir`.
2210-
mmap: Option<Mmap>,
2210+
// The declaration order matters because `full_mmap` should be dropped
2211+
// before `_temp_dir`.
2212+
full_mmap: Option<Mmap>,
2213+
reference: Option<Vec<u8>>,
22112214
// We need to carry MaybeTempDir to avoid deleting the temporary
22122215
// directory while accessing the Mmap.
22132216
_temp_dir: Option<MaybeTempDir>,
22142217
}
22152218

22162219
impl EncodedMetadata {
22172220
#[inline]
2218-
pub fn from_path(path: PathBuf, temp_dir: Option<MaybeTempDir>) -> std::io::Result<Self> {
2221+
pub fn from_path(
2222+
path: PathBuf,
2223+
reference_path: PathBuf,
2224+
temp_dir: Option<MaybeTempDir>,
2225+
) -> std::io::Result<Self> {
22192226
let file = std::fs::File::open(&path)?;
22202227
let file_metadata = file.metadata()?;
22212228
if file_metadata.len() == 0 {
2222-
return Ok(Self { mmap: None, _temp_dir: None });
2229+
return Ok(Self { full_mmap: None, reference: None, _temp_dir: None });
22232230
}
2224-
let mmap = unsafe { Some(Mmap::map(file)?) };
2225-
Ok(Self { mmap, _temp_dir: temp_dir })
2231+
let full_mmap = unsafe { Some(Mmap::map(file)?) };
2232+
2233+
let reference = std::fs::read(reference_path)?;
2234+
let reference = if reference.is_empty() { None } else { Some(reference) };
2235+
2236+
Ok(Self { full_mmap, reference, _temp_dir: temp_dir })
2237+
}
2238+
2239+
#[inline]
2240+
pub fn full(&self) -> &[u8] {
2241+
&self.full_mmap.as_deref().unwrap_or_default()
22262242
}
22272243

22282244
#[inline]
2229-
pub fn raw_data(&self) -> &[u8] {
2230-
self.mmap.as_deref().unwrap_or_default()
2245+
pub fn maybe_reference(&self) -> &[u8] {
2246+
self.reference.as_deref().unwrap_or(self.full())
22312247
}
22322248
}
22332249

22342250
impl<S: Encoder> Encodable<S> for EncodedMetadata {
22352251
fn encode(&self, s: &mut S) {
2236-
let slice = self.raw_data();
2252+
self.reference.encode(s);
2253+
2254+
let slice = self.full();
22372255
slice.encode(s)
22382256
}
22392257
}
22402258

22412259
impl<D: Decoder> Decodable<D> for EncodedMetadata {
22422260
fn decode(d: &mut D) -> Self {
2261+
let reference = <Option<Vec<u8>>>::decode(d);
2262+
22432263
let len = d.read_usize();
2244-
let mmap = if len > 0 {
2264+
let full_mmap = if len > 0 {
22452265
let mut mmap = MmapMut::map_anon(len).unwrap();
22462266
for _ in 0..len {
22472267
(&mut mmap[..]).write_all(&[d.read_u8()]).unwrap();
@@ -2252,11 +2272,11 @@ impl<D: Decoder> Decodable<D> for EncodedMetadata {
22522272
None
22532273
};
22542274

2255-
Self { mmap, _temp_dir: None }
2275+
Self { full_mmap, reference, _temp_dir: None }
22562276
}
22572277
}
22582278

2259-
pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
2279+
pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path, ref_path: &Path) {
22602280
let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata");
22612281

22622282
// Since encoding metadata is not in a query, and nothing is cached,
@@ -2270,6 +2290,44 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
22702290
join(|| prefetch_mir(tcx), || tcx.exported_symbols(LOCAL_CRATE));
22712291
}
22722292

2293+
with_encode_metadata_header(tcx, path, |ecx| {
2294+
// Encode all the entries and extra information in the crate,
2295+
// culminating in the `CrateRoot` which points to all of it.
2296+
let root = ecx.encode_crate_root();
2297+
2298+
// Flush buffer to ensure backing file has the correct size.
2299+
ecx.opaque.flush();
2300+
// Record metadata size for self-profiling
2301+
tcx.prof.artifact_size(
2302+
"crate_metadata",
2303+
"crate_metadata",
2304+
ecx.opaque.file().metadata().unwrap().len(),
2305+
);
2306+
2307+
root.position.get()
2308+
});
2309+
2310+
if tcx.sess.opts.unstable_opts.split_metadata
2311+
&& !tcx.crate_types().contains(&CrateType::ProcMacro)
2312+
{
2313+
with_encode_metadata_header(tcx, ref_path, |ecx| {
2314+
let header: LazyValue<CrateHeader> = ecx.lazy(CrateHeader {
2315+
name: tcx.crate_name(LOCAL_CRATE),
2316+
triple: tcx.sess.opts.target_triple.clone(),
2317+
hash: tcx.crate_hash(LOCAL_CRATE),
2318+
is_proc_macro_crate: false,
2319+
is_reference: true,
2320+
});
2321+
header.position.get()
2322+
});
2323+
}
2324+
}
2325+
2326+
fn with_encode_metadata_header(
2327+
tcx: TyCtxt<'_>,
2328+
path: &Path,
2329+
f: impl FnOnce(&mut EncodeContext<'_, '_>) -> usize,
2330+
) {
22732331
let mut encoder = opaque::FileEncoder::new(path)
22742332
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailCreateFileEncoder { err }));
22752333
encoder.emit_raw_bytes(METADATA_HEADER);
@@ -2304,9 +2362,7 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
23042362
// Encode the rustc version string in a predictable location.
23052363
rustc_version(tcx.sess.cfg_version).encode(&mut ecx);
23062364

2307-
// Encode all the entries and extra information in the crate,
2308-
// culminating in the `CrateRoot` which points to all of it.
2309-
let root = ecx.encode_crate_root();
2365+
let root_position = f(&mut ecx);
23102366

23112367
// Make sure we report any errors from writing to the file.
23122368
// If we forget this, compilation can succeed with an incomplete rmeta file,
@@ -2316,12 +2372,9 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
23162372
}
23172373

23182374
let file = ecx.opaque.file();
2319-
if let Err(err) = encode_root_position(file, root.position.get()) {
2375+
if let Err(err) = encode_root_position(file, root_position) {
23202376
tcx.dcx().emit_fatal(FailWriteFile { path: ecx.opaque.path(), err });
23212377
}
2322-
2323-
// Record metadata size for self-profiling
2324-
tcx.prof.artifact_size("crate_metadata", "crate_metadata", file.metadata().unwrap().len());
23252378
}
23262379

23272380
fn encode_root_position(mut file: &File, pos: usize) -> Result<(), std::io::Error> {

compiler/rustc_metadata/src/rmeta/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,10 @@ pub(crate) struct CrateHeader {
221221
/// This is separate from [`ProcMacroData`] to avoid having to update [`METADATA_VERSION`] every
222222
/// time ProcMacroData changes.
223223
pub(crate) is_proc_macro_crate: bool,
224+
/// Whether this header is a reference to a separate rmeta file.
225+
///
226+
/// This is used inside rlibs and dylibs when using `-Zsplit-metadata`.
227+
pub(crate) is_reference: bool,
224228
}
225229

226230
/// Serialized `.rmeta` data for a crate.

compiler/rustc_session/src/options.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2060,6 +2060,8 @@ written to standard error output)"),
20602060
by the linker"),
20612061
split_lto_unit: Option<bool> = (None, parse_opt_bool, [TRACKED],
20622062
"enable LTO unit splitting (default: no)"),
2063+
split_metadata: bool = (false, parse_bool, [TRACKED],
2064+
"split metadata out of libraries into .rmeta files"),
20632065
src_hash_algorithm: Option<SourceFileHashAlgorithm> = (None, parse_src_file_hash, [TRACKED],
20642066
"hash algorithm of source files in debug info (`md5`, `sha1`, or `sha256`)"),
20652067
#[rustc_lint_opt_deny_field_access("use `Session::stack_protector` instead of this field")]

0 commit comments

Comments
 (0)