mirror of https://github.com/rust-lang/rust.git
Auto merge of #125928 - michaelwoerister:fix-cgu-hashstable, r=oli-obk
Stabilize order of MonoItems in CGUs and disallow query_instability lint for rustc_monomorphize The HashStable impl for `CodegenUnit` was incorrect as described in [MCP 533](https://github.com/rust-lang/compiler-team/issues/533). This PR removes any indeterminism from the way codegen units are built. The changes are pretty straightforward. Part of https://github.com/rust-lang/rust/issues/84447 and [MCP 533](https://github.com/rust-lang/compiler-team/issues/533).
This commit is contained in:
commit
b74702fbb2
|
@ -5,9 +5,9 @@ use rustc_data_structures::base_n::BaseNString;
|
|||
use rustc_data_structures::base_n::ToBaseN;
|
||||
use rustc_data_structures::base_n::CASE_INSENSITIVE;
|
||||
use rustc_data_structures::fingerprint::Fingerprint;
|
||||
use rustc_data_structures::fx::FxHashMap;
|
||||
use rustc_data_structures::fx::FxIndexMap;
|
||||
use rustc_data_structures::stable_hasher::{Hash128, HashStable, StableHasher};
|
||||
use rustc_data_structures::stable_hasher::{Hash128, HashStable, StableHasher, ToStableHashKey};
|
||||
use rustc_data_structures::unord::UnordMap;
|
||||
use rustc_hir::def_id::{CrateNum, DefId, LOCAL_CRATE};
|
||||
use rustc_hir::ItemId;
|
||||
use rustc_index::Idx;
|
||||
|
@ -241,7 +241,17 @@ impl<'tcx> fmt::Display for MonoItem<'tcx> {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
impl ToStableHashKey<StableHashingContext<'_>> for MonoItem<'_> {
|
||||
type KeyType = Fingerprint;
|
||||
|
||||
fn to_stable_hash_key(&self, hcx: &StableHashingContext<'_>) -> Self::KeyType {
|
||||
let mut hasher = StableHasher::new();
|
||||
self.hash_stable(&mut hcx.clone(), &mut hasher);
|
||||
hasher.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, HashStable)]
|
||||
pub struct CodegenUnit<'tcx> {
|
||||
/// A name for this CGU. Incremental compilation requires that
|
||||
/// name be unique amongst **all** crates. Therefore, it should
|
||||
|
@ -430,38 +440,19 @@ impl<'tcx> CodegenUnit<'tcx> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a, 'tcx> HashStable<StableHashingContext<'a>> for CodegenUnit<'tcx> {
|
||||
fn hash_stable(&self, hcx: &mut StableHashingContext<'a>, hasher: &mut StableHasher) {
|
||||
let CodegenUnit {
|
||||
ref items,
|
||||
name,
|
||||
// The size estimate is not relevant to the hash
|
||||
size_estimate: _,
|
||||
primary: _,
|
||||
is_code_coverage_dead_code_cgu,
|
||||
} = *self;
|
||||
impl ToStableHashKey<StableHashingContext<'_>> for CodegenUnit<'_> {
|
||||
type KeyType = String;
|
||||
|
||||
name.hash_stable(hcx, hasher);
|
||||
is_code_coverage_dead_code_cgu.hash_stable(hcx, hasher);
|
||||
|
||||
let mut items: Vec<(Fingerprint, _)> = items
|
||||
.iter()
|
||||
.map(|(mono_item, &attrs)| {
|
||||
let mut hasher = StableHasher::new();
|
||||
mono_item.hash_stable(hcx, &mut hasher);
|
||||
let mono_item_fingerprint = hasher.finish();
|
||||
(mono_item_fingerprint, attrs)
|
||||
})
|
||||
.collect();
|
||||
|
||||
items.sort_unstable_by_key(|i| i.0);
|
||||
items.hash_stable(hcx, hasher);
|
||||
fn to_stable_hash_key(&self, _: &StableHashingContext<'_>) -> Self::KeyType {
|
||||
// Codegen unit names are conceptually required to be stable across
|
||||
// compilation session so that object file names match up.
|
||||
self.name.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CodegenUnitNameBuilder<'tcx> {
|
||||
tcx: TyCtxt<'tcx>,
|
||||
cache: FxHashMap<CrateNum, String>,
|
||||
cache: UnordMap<CrateNum, String>,
|
||||
}
|
||||
|
||||
impl<'tcx> CodegenUnitNameBuilder<'tcx> {
|
||||
|
|
|
@ -207,8 +207,8 @@
|
|||
|
||||
mod move_check;
|
||||
|
||||
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
|
||||
use rustc_data_structures::sync::{par_for_each_in, LRef, MTLock};
|
||||
use rustc_data_structures::unord::{UnordMap, UnordSet};
|
||||
use rustc_hir as hir;
|
||||
use rustc_hir::def::DefKind;
|
||||
use rustc_hir::def_id::{DefId, DefIdMap, LocalDefId};
|
||||
|
@ -251,10 +251,10 @@ pub enum MonoItemCollectionStrategy {
|
|||
|
||||
pub struct UsageMap<'tcx> {
|
||||
// Maps every mono item to the mono items used by it.
|
||||
used_map: FxHashMap<MonoItem<'tcx>, Vec<MonoItem<'tcx>>>,
|
||||
used_map: UnordMap<MonoItem<'tcx>, Vec<MonoItem<'tcx>>>,
|
||||
|
||||
// Maps every mono item to the mono items that use it.
|
||||
user_map: FxHashMap<MonoItem<'tcx>, Vec<MonoItem<'tcx>>>,
|
||||
user_map: UnordMap<MonoItem<'tcx>, Vec<MonoItem<'tcx>>>,
|
||||
}
|
||||
|
||||
type MonoItems<'tcx> = Vec<Spanned<MonoItem<'tcx>>>;
|
||||
|
@ -262,10 +262,10 @@ type MonoItems<'tcx> = Vec<Spanned<MonoItem<'tcx>>>;
|
|||
/// The state that is shared across the concurrent threads that are doing collection.
|
||||
struct SharedState<'tcx> {
|
||||
/// Items that have been or are currently being recursively collected.
|
||||
visited: MTLock<FxHashSet<MonoItem<'tcx>>>,
|
||||
visited: MTLock<UnordSet<MonoItem<'tcx>>>,
|
||||
/// Items that have been or are currently being recursively treated as "mentioned", i.e., their
|
||||
/// consts are evaluated but nothing is added to the collection.
|
||||
mentioned: MTLock<FxHashSet<MonoItem<'tcx>>>,
|
||||
mentioned: MTLock<UnordSet<MonoItem<'tcx>>>,
|
||||
/// Which items are being used where, for better errors.
|
||||
usage_map: MTLock<UsageMap<'tcx>>,
|
||||
}
|
||||
|
@ -290,7 +290,7 @@ enum CollectionMode {
|
|||
|
||||
impl<'tcx> UsageMap<'tcx> {
|
||||
fn new() -> UsageMap<'tcx> {
|
||||
UsageMap { used_map: FxHashMap::default(), user_map: FxHashMap::default() }
|
||||
UsageMap { used_map: Default::default(), user_map: Default::default() }
|
||||
}
|
||||
|
||||
fn record_used<'a>(
|
||||
|
@ -668,7 +668,7 @@ struct MirUsedCollector<'a, 'tcx> {
|
|||
used_items: &'a mut MonoItems<'tcx>,
|
||||
/// See the comment in `collect_items_of_instance` for the purpose of this set.
|
||||
/// Note that this contains *not-monomorphized* items!
|
||||
used_mentioned_items: &'a mut FxHashSet<MentionedItem<'tcx>>,
|
||||
used_mentioned_items: &'a mut UnordSet<MentionedItem<'tcx>>,
|
||||
instance: Instance<'tcx>,
|
||||
visiting_call_terminator: bool,
|
||||
move_check: move_check::MoveCheckState,
|
||||
|
@ -1272,7 +1272,7 @@ fn collect_items_of_instance<'tcx>(
|
|||
// mentioned item. So instead we collect all pre-monomorphized `MentionedItem` that were already
|
||||
// added to `used_items` in a hash set, which can efficiently query in the
|
||||
// `body.mentioned_items` loop below without even having to monomorphize the item.
|
||||
let mut used_mentioned_items = FxHashSet::<MentionedItem<'tcx>>::default();
|
||||
let mut used_mentioned_items = Default::default();
|
||||
let mut collector = MirUsedCollector {
|
||||
tcx,
|
||||
body,
|
||||
|
@ -1628,10 +1628,10 @@ fn create_mono_items_for_default_impls<'tcx>(
|
|||
//=-----------------------------------------------------------------------------
|
||||
|
||||
#[instrument(skip(tcx, strategy), level = "debug")]
|
||||
pub fn collect_crate_mono_items(
|
||||
tcx: TyCtxt<'_>,
|
||||
pub(crate) fn collect_crate_mono_items<'tcx>(
|
||||
tcx: TyCtxt<'tcx>,
|
||||
strategy: MonoItemCollectionStrategy,
|
||||
) -> (FxHashSet<MonoItem<'_>>, UsageMap<'_>) {
|
||||
) -> (Vec<MonoItem<'tcx>>, UsageMap<'tcx>) {
|
||||
let _prof_timer = tcx.prof.generic_activity("monomorphization_collector");
|
||||
|
||||
let roots = tcx
|
||||
|
@ -1641,8 +1641,8 @@ pub fn collect_crate_mono_items(
|
|||
debug!("building mono item graph, beginning at roots");
|
||||
|
||||
let mut state = SharedState {
|
||||
visited: MTLock::new(FxHashSet::default()),
|
||||
mentioned: MTLock::new(FxHashSet::default()),
|
||||
visited: MTLock::new(UnordSet::default()),
|
||||
mentioned: MTLock::new(UnordSet::default()),
|
||||
usage_map: MTLock::new(UsageMap::new()),
|
||||
};
|
||||
let recursion_limit = tcx.recursion_limit();
|
||||
|
@ -1665,5 +1665,11 @@ pub fn collect_crate_mono_items(
|
|||
});
|
||||
}
|
||||
|
||||
(state.visited.into_inner(), state.usage_map.into_inner())
|
||||
// The set of MonoItems was created in an inherently indeterministic order because
|
||||
// of parallelism. We sort it here to ensure that the output is deterministic.
|
||||
let mono_items = tcx.with_stable_hashing_context(move |ref hcx| {
|
||||
state.visited.into_inner().into_sorted(hcx, true)
|
||||
});
|
||||
|
||||
(mono_items, state.usage_map.into_inner())
|
||||
}
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
#![feature(array_windows)]
|
||||
#![feature(is_sorted)]
|
||||
#![allow(rustc::potential_query_instability)]
|
||||
|
||||
use rustc_hir::lang_items::LangItem;
|
||||
use rustc_middle::bug;
|
||||
|
|
|
@ -98,8 +98,9 @@ use std::fs::{self, File};
|
|||
use std::io::{BufWriter, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
|
||||
use rustc_data_structures::fx::{FxIndexMap, FxIndexSet};
|
||||
use rustc_data_structures::sync;
|
||||
use rustc_data_structures::unord::{UnordMap, UnordSet};
|
||||
use rustc_hir::def::DefKind;
|
||||
use rustc_hir::def_id::{DefId, DefIdSet, LOCAL_CRATE};
|
||||
use rustc_hir::definitions::DefPathDataName;
|
||||
|
@ -131,7 +132,7 @@ struct PlacedMonoItems<'tcx> {
|
|||
/// The codegen units, sorted by name to make things deterministic.
|
||||
codegen_units: Vec<CodegenUnit<'tcx>>,
|
||||
|
||||
internalization_candidates: FxHashSet<MonoItem<'tcx>>,
|
||||
internalization_candidates: UnordSet<MonoItem<'tcx>>,
|
||||
}
|
||||
|
||||
// The output CGUs are sorted by name.
|
||||
|
@ -197,9 +198,9 @@ fn place_mono_items<'tcx, I>(cx: &PartitioningCx<'_, 'tcx>, mono_items: I) -> Pl
|
|||
where
|
||||
I: Iterator<Item = MonoItem<'tcx>>,
|
||||
{
|
||||
let mut codegen_units = FxHashMap::default();
|
||||
let mut codegen_units = UnordMap::default();
|
||||
let is_incremental_build = cx.tcx.sess.opts.incremental.is_some();
|
||||
let mut internalization_candidates = FxHashSet::default();
|
||||
let mut internalization_candidates = UnordSet::default();
|
||||
|
||||
// Determine if monomorphizations instantiated in this crate will be made
|
||||
// available to downstream crates. This depends on whether we are in
|
||||
|
@ -209,7 +210,7 @@ where
|
|||
cx.tcx.sess.opts.share_generics() && cx.tcx.local_crate_exports_generics();
|
||||
|
||||
let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);
|
||||
let cgu_name_cache = &mut FxHashMap::default();
|
||||
let cgu_name_cache = &mut UnordMap::default();
|
||||
|
||||
for mono_item in mono_items {
|
||||
// Handle only root (GloballyShared) items directly here. Inlined (LocalCopy) items
|
||||
|
@ -260,7 +261,7 @@ where
|
|||
// going via another root item. This includes drop-glue, functions from
|
||||
// external crates, and local functions the definition of which is
|
||||
// marked with `#[inline]`.
|
||||
let mut reachable_inlined_items = FxHashSet::default();
|
||||
let mut reachable_inlined_items = FxIndexSet::default();
|
||||
get_reachable_inlined_items(cx.tcx, mono_item, cx.usage_map, &mut reachable_inlined_items);
|
||||
|
||||
// Add those inlined items. It's possible an inlined item is reachable
|
||||
|
@ -284,8 +285,9 @@ where
|
|||
codegen_units.insert(cgu_name, CodegenUnit::new(cgu_name));
|
||||
}
|
||||
|
||||
let mut codegen_units: Vec<_> = codegen_units.into_values().collect();
|
||||
codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
|
||||
let mut codegen_units: Vec<_> = cx.tcx.with_stable_hashing_context(|ref hcx| {
|
||||
codegen_units.into_items().map(|(_, cgu)| cgu).collect_sorted(hcx, true)
|
||||
});
|
||||
|
||||
for cgu in codegen_units.iter_mut() {
|
||||
cgu.compute_size_estimate();
|
||||
|
@ -297,7 +299,7 @@ where
|
|||
tcx: TyCtxt<'tcx>,
|
||||
item: MonoItem<'tcx>,
|
||||
usage_map: &UsageMap<'tcx>,
|
||||
visited: &mut FxHashSet<MonoItem<'tcx>>,
|
||||
visited: &mut FxIndexSet<MonoItem<'tcx>>,
|
||||
) {
|
||||
usage_map.for_each_inlined_used_item(tcx, item, |inlined_item| {
|
||||
let is_new = visited.insert(inlined_item);
|
||||
|
@ -320,7 +322,7 @@ fn merge_codegen_units<'tcx>(
|
|||
assert!(codegen_units.is_sorted_by(|a, b| a.name().as_str() <= b.name().as_str()));
|
||||
|
||||
// This map keeps track of what got merged into what.
|
||||
let mut cgu_contents: FxHashMap<Symbol, Vec<Symbol>> =
|
||||
let mut cgu_contents: UnordMap<Symbol, Vec<Symbol>> =
|
||||
codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect();
|
||||
|
||||
// If N is the maximum number of CGUs, and the CGUs are sorted from largest
|
||||
|
@ -422,22 +424,24 @@ fn merge_codegen_units<'tcx>(
|
|||
// For CGUs that contain the code of multiple modules because of the
|
||||
// merging done above, we use a concatenation of the names of all
|
||||
// contained CGUs.
|
||||
let new_cgu_names: FxHashMap<Symbol, String> = cgu_contents
|
||||
.into_iter()
|
||||
let new_cgu_names = UnordMap::from(
|
||||
cgu_contents
|
||||
.items()
|
||||
// This `filter` makes sure we only update the name of CGUs that
|
||||
// were actually modified by merging.
|
||||
.filter(|(_, cgu_contents)| cgu_contents.len() > 1)
|
||||
.map(|(current_cgu_name, cgu_contents)| {
|
||||
let mut cgu_contents: Vec<&str> = cgu_contents.iter().map(|s| s.as_str()).collect();
|
||||
let mut cgu_contents: Vec<&str> =
|
||||
cgu_contents.iter().map(|s| s.as_str()).collect();
|
||||
|
||||
// Sort the names, so things are deterministic and easy to
|
||||
// predict. We are sorting primitive `&str`s here so we can
|
||||
// use unstable sort.
|
||||
cgu_contents.sort_unstable();
|
||||
|
||||
(current_cgu_name, cgu_contents.join("--"))
|
||||
})
|
||||
.collect();
|
||||
(*current_cgu_name, cgu_contents.join("--"))
|
||||
}),
|
||||
);
|
||||
|
||||
for cgu in codegen_units.iter_mut() {
|
||||
if let Some(new_cgu_name) = new_cgu_names.get(&cgu.name()) {
|
||||
|
@ -511,7 +515,7 @@ fn compute_inlined_overlap<'tcx>(cgu1: &CodegenUnit<'tcx>, cgu2: &CodegenUnit<'t
|
|||
fn internalize_symbols<'tcx>(
|
||||
cx: &PartitioningCx<'_, 'tcx>,
|
||||
codegen_units: &mut [CodegenUnit<'tcx>],
|
||||
internalization_candidates: FxHashSet<MonoItem<'tcx>>,
|
||||
internalization_candidates: UnordSet<MonoItem<'tcx>>,
|
||||
) {
|
||||
/// For symbol internalization, we need to know whether a symbol/mono-item
|
||||
/// is used from outside the codegen unit it is defined in. This type is
|
||||
|
@ -522,7 +526,7 @@ fn internalize_symbols<'tcx>(
|
|||
MultipleCgus,
|
||||
}
|
||||
|
||||
let mut mono_item_placements = FxHashMap::default();
|
||||
let mut mono_item_placements = UnordMap::default();
|
||||
let single_codegen_unit = codegen_units.len() == 1;
|
||||
|
||||
if !single_codegen_unit {
|
||||
|
@ -739,7 +743,7 @@ fn mono_item_linkage_and_visibility<'tcx>(
|
|||
(Linkage::External, vis)
|
||||
}
|
||||
|
||||
type CguNameCache = FxHashMap<(DefId, bool), Symbol>;
|
||||
type CguNameCache = UnordMap<(DefId, bool), Symbol>;
|
||||
|
||||
fn static_visibility<'tcx>(
|
||||
tcx: TyCtxt<'tcx>,
|
||||
|
@ -932,7 +936,7 @@ fn debug_dump<'a, 'tcx: 'a>(tcx: TyCtxt<'tcx>, label: &str, cgus: &[CodegenUnit<
|
|||
//
|
||||
// Also, unreached inlined items won't be counted here. This is fine.
|
||||
|
||||
let mut inlined_items = FxHashSet::default();
|
||||
let mut inlined_items = UnordSet::default();
|
||||
|
||||
let mut root_items = 0;
|
||||
let mut unique_inlined_items = 0;
|
||||
|
@ -1164,7 +1168,7 @@ fn collect_and_partition_mono_items(tcx: TyCtxt<'_>, (): ()) -> (&DefIdSet, &[Co
|
|||
}
|
||||
|
||||
if tcx.sess.opts.unstable_opts.print_mono_items.is_some() {
|
||||
let mut item_to_cgus: FxHashMap<_, Vec<_>> = Default::default();
|
||||
let mut item_to_cgus: UnordMap<_, Vec<_>> = Default::default();
|
||||
|
||||
for cgu in codegen_units {
|
||||
for (&mono_item, &data) in cgu.items() {
|
||||
|
@ -1240,7 +1244,7 @@ fn dump_mono_items_stats<'tcx>(
|
|||
let mut file = BufWriter::new(file);
|
||||
|
||||
// Gather instantiated mono items grouped by def_id
|
||||
let mut items_per_def_id: FxHashMap<_, Vec<_>> = Default::default();
|
||||
let mut items_per_def_id: FxIndexMap<_, Vec<_>> = Default::default();
|
||||
for cgu in codegen_units {
|
||||
cgu.items()
|
||||
.keys()
|
||||
|
|
Loading…
Reference in New Issue