Auto merge of #125928 - michaelwoerister:fix-cgu-hashstable, r=oli-obk

Stabilize order of MonoItems in CGUs and disallow query_instability lint for rustc_monomorphize

The HashStable impl for `CodegenUnit` was incorrect as described in [MCP 533](https://github.com/rust-lang/compiler-team/issues/533). This PR removes any indeterminism from the way codegen units are built. The changes are pretty straightforward.

Part of https://github.com/rust-lang/rust/issues/84447 and [MCP 533](https://github.com/rust-lang/compiler-team/issues/533).
This commit is contained in:
bors 2024-06-07 04:02:59 +00:00
commit b74702fbb2
4 changed files with 74 additions and 74 deletions

View File

@ -5,9 +5,9 @@ use rustc_data_structures::base_n::BaseNString;
use rustc_data_structures::base_n::ToBaseN;
use rustc_data_structures::base_n::CASE_INSENSITIVE;
use rustc_data_structures::fingerprint::Fingerprint;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::fx::FxIndexMap;
use rustc_data_structures::stable_hasher::{Hash128, HashStable, StableHasher};
use rustc_data_structures::stable_hasher::{Hash128, HashStable, StableHasher, ToStableHashKey};
use rustc_data_structures::unord::UnordMap;
use rustc_hir::def_id::{CrateNum, DefId, LOCAL_CRATE};
use rustc_hir::ItemId;
use rustc_index::Idx;
@ -241,7 +241,17 @@ impl<'tcx> fmt::Display for MonoItem<'tcx> {
}
}
#[derive(Debug)]
impl ToStableHashKey<StableHashingContext<'_>> for MonoItem<'_> {
type KeyType = Fingerprint;
fn to_stable_hash_key(&self, hcx: &StableHashingContext<'_>) -> Self::KeyType {
let mut hasher = StableHasher::new();
self.hash_stable(&mut hcx.clone(), &mut hasher);
hasher.finish()
}
}
#[derive(Debug, HashStable)]
pub struct CodegenUnit<'tcx> {
/// A name for this CGU. Incremental compilation requires that
/// name be unique amongst **all** crates. Therefore, it should
@ -430,38 +440,19 @@ impl<'tcx> CodegenUnit<'tcx> {
}
}
impl<'a, 'tcx> HashStable<StableHashingContext<'a>> for CodegenUnit<'tcx> {
fn hash_stable(&self, hcx: &mut StableHashingContext<'a>, hasher: &mut StableHasher) {
let CodegenUnit {
ref items,
name,
// The size estimate is not relevant to the hash
size_estimate: _,
primary: _,
is_code_coverage_dead_code_cgu,
} = *self;
impl ToStableHashKey<StableHashingContext<'_>> for CodegenUnit<'_> {
type KeyType = String;
name.hash_stable(hcx, hasher);
is_code_coverage_dead_code_cgu.hash_stable(hcx, hasher);
let mut items: Vec<(Fingerprint, _)> = items
.iter()
.map(|(mono_item, &attrs)| {
let mut hasher = StableHasher::new();
mono_item.hash_stable(hcx, &mut hasher);
let mono_item_fingerprint = hasher.finish();
(mono_item_fingerprint, attrs)
})
.collect();
items.sort_unstable_by_key(|i| i.0);
items.hash_stable(hcx, hasher);
fn to_stable_hash_key(&self, _: &StableHashingContext<'_>) -> Self::KeyType {
// Codegen unit names are conceptually required to be stable across
// compilation session so that object file names match up.
self.name.to_string()
}
}
pub struct CodegenUnitNameBuilder<'tcx> {
tcx: TyCtxt<'tcx>,
cache: FxHashMap<CrateNum, String>,
cache: UnordMap<CrateNum, String>,
}
impl<'tcx> CodegenUnitNameBuilder<'tcx> {

View File

@ -207,8 +207,8 @@
mod move_check;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_data_structures::sync::{par_for_each_in, LRef, MTLock};
use rustc_data_structures::unord::{UnordMap, UnordSet};
use rustc_hir as hir;
use rustc_hir::def::DefKind;
use rustc_hir::def_id::{DefId, DefIdMap, LocalDefId};
@ -251,10 +251,10 @@ pub enum MonoItemCollectionStrategy {
pub struct UsageMap<'tcx> {
// Maps every mono item to the mono items used by it.
used_map: FxHashMap<MonoItem<'tcx>, Vec<MonoItem<'tcx>>>,
used_map: UnordMap<MonoItem<'tcx>, Vec<MonoItem<'tcx>>>,
// Maps every mono item to the mono items that use it.
user_map: FxHashMap<MonoItem<'tcx>, Vec<MonoItem<'tcx>>>,
user_map: UnordMap<MonoItem<'tcx>, Vec<MonoItem<'tcx>>>,
}
type MonoItems<'tcx> = Vec<Spanned<MonoItem<'tcx>>>;
@ -262,10 +262,10 @@ type MonoItems<'tcx> = Vec<Spanned<MonoItem<'tcx>>>;
/// The state that is shared across the concurrent threads that are doing collection.
struct SharedState<'tcx> {
/// Items that have been or are currently being recursively collected.
visited: MTLock<FxHashSet<MonoItem<'tcx>>>,
visited: MTLock<UnordSet<MonoItem<'tcx>>>,
/// Items that have been or are currently being recursively treated as "mentioned", i.e., their
/// consts are evaluated but nothing is added to the collection.
mentioned: MTLock<FxHashSet<MonoItem<'tcx>>>,
mentioned: MTLock<UnordSet<MonoItem<'tcx>>>,
/// Which items are being used where, for better errors.
usage_map: MTLock<UsageMap<'tcx>>,
}
@ -290,7 +290,7 @@ enum CollectionMode {
impl<'tcx> UsageMap<'tcx> {
fn new() -> UsageMap<'tcx> {
UsageMap { used_map: FxHashMap::default(), user_map: FxHashMap::default() }
UsageMap { used_map: Default::default(), user_map: Default::default() }
}
fn record_used<'a>(
@ -668,7 +668,7 @@ struct MirUsedCollector<'a, 'tcx> {
used_items: &'a mut MonoItems<'tcx>,
/// See the comment in `collect_items_of_instance` for the purpose of this set.
/// Note that this contains *not-monomorphized* items!
used_mentioned_items: &'a mut FxHashSet<MentionedItem<'tcx>>,
used_mentioned_items: &'a mut UnordSet<MentionedItem<'tcx>>,
instance: Instance<'tcx>,
visiting_call_terminator: bool,
move_check: move_check::MoveCheckState,
@ -1272,7 +1272,7 @@ fn collect_items_of_instance<'tcx>(
// mentioned item. So instead we collect all pre-monomorphized `MentionedItem` that were already
// added to `used_items` in a hash set, which can efficiently query in the
// `body.mentioned_items` loop below without even having to monomorphize the item.
let mut used_mentioned_items = FxHashSet::<MentionedItem<'tcx>>::default();
let mut used_mentioned_items = Default::default();
let mut collector = MirUsedCollector {
tcx,
body,
@ -1628,10 +1628,10 @@ fn create_mono_items_for_default_impls<'tcx>(
//=-----------------------------------------------------------------------------
#[instrument(skip(tcx, strategy), level = "debug")]
pub fn collect_crate_mono_items(
tcx: TyCtxt<'_>,
pub(crate) fn collect_crate_mono_items<'tcx>(
tcx: TyCtxt<'tcx>,
strategy: MonoItemCollectionStrategy,
) -> (FxHashSet<MonoItem<'_>>, UsageMap<'_>) {
) -> (Vec<MonoItem<'tcx>>, UsageMap<'tcx>) {
let _prof_timer = tcx.prof.generic_activity("monomorphization_collector");
let roots = tcx
@ -1641,8 +1641,8 @@ pub fn collect_crate_mono_items(
debug!("building mono item graph, beginning at roots");
let mut state = SharedState {
visited: MTLock::new(FxHashSet::default()),
mentioned: MTLock::new(FxHashSet::default()),
visited: MTLock::new(UnordSet::default()),
mentioned: MTLock::new(UnordSet::default()),
usage_map: MTLock::new(UsageMap::new()),
};
let recursion_limit = tcx.recursion_limit();
@ -1665,5 +1665,11 @@ pub fn collect_crate_mono_items(
});
}
(state.visited.into_inner(), state.usage_map.into_inner())
// The set of MonoItems was created in an inherently indeterministic order because
// of parallelism. We sort it here to ensure that the output is deterministic.
let mono_items = tcx.with_stable_hashing_context(move |ref hcx| {
state.visited.into_inner().into_sorted(hcx, true)
});
(mono_items, state.usage_map.into_inner())
}

View File

@ -1,6 +1,5 @@
#![feature(array_windows)]
#![feature(is_sorted)]
#![allow(rustc::potential_query_instability)]
use rustc_hir::lang_items::LangItem;
use rustc_middle::bug;

View File

@ -98,8 +98,9 @@ use std::fs::{self, File};
use std::io::{BufWriter, Write};
use std::path::{Path, PathBuf};
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_data_structures::fx::{FxIndexMap, FxIndexSet};
use rustc_data_structures::sync;
use rustc_data_structures::unord::{UnordMap, UnordSet};
use rustc_hir::def::DefKind;
use rustc_hir::def_id::{DefId, DefIdSet, LOCAL_CRATE};
use rustc_hir::definitions::DefPathDataName;
@ -131,7 +132,7 @@ struct PlacedMonoItems<'tcx> {
/// The codegen units, sorted by name to make things deterministic.
codegen_units: Vec<CodegenUnit<'tcx>>,
internalization_candidates: FxHashSet<MonoItem<'tcx>>,
internalization_candidates: UnordSet<MonoItem<'tcx>>,
}
// The output CGUs are sorted by name.
@ -197,9 +198,9 @@ fn place_mono_items<'tcx, I>(cx: &PartitioningCx<'_, 'tcx>, mono_items: I) -> Pl
where
I: Iterator<Item = MonoItem<'tcx>>,
{
let mut codegen_units = FxHashMap::default();
let mut codegen_units = UnordMap::default();
let is_incremental_build = cx.tcx.sess.opts.incremental.is_some();
let mut internalization_candidates = FxHashSet::default();
let mut internalization_candidates = UnordSet::default();
// Determine if monomorphizations instantiated in this crate will be made
// available to downstream crates. This depends on whether we are in
@ -209,7 +210,7 @@ where
cx.tcx.sess.opts.share_generics() && cx.tcx.local_crate_exports_generics();
let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);
let cgu_name_cache = &mut FxHashMap::default();
let cgu_name_cache = &mut UnordMap::default();
for mono_item in mono_items {
// Handle only root (GloballyShared) items directly here. Inlined (LocalCopy) items
@ -260,7 +261,7 @@ where
// going via another root item. This includes drop-glue, functions from
// external crates, and local functions the definition of which is
// marked with `#[inline]`.
let mut reachable_inlined_items = FxHashSet::default();
let mut reachable_inlined_items = FxIndexSet::default();
get_reachable_inlined_items(cx.tcx, mono_item, cx.usage_map, &mut reachable_inlined_items);
// Add those inlined items. It's possible an inlined item is reachable
@ -284,8 +285,9 @@ where
codegen_units.insert(cgu_name, CodegenUnit::new(cgu_name));
}
let mut codegen_units: Vec<_> = codegen_units.into_values().collect();
codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
let mut codegen_units: Vec<_> = cx.tcx.with_stable_hashing_context(|ref hcx| {
codegen_units.into_items().map(|(_, cgu)| cgu).collect_sorted(hcx, true)
});
for cgu in codegen_units.iter_mut() {
cgu.compute_size_estimate();
@ -297,7 +299,7 @@ where
tcx: TyCtxt<'tcx>,
item: MonoItem<'tcx>,
usage_map: &UsageMap<'tcx>,
visited: &mut FxHashSet<MonoItem<'tcx>>,
visited: &mut FxIndexSet<MonoItem<'tcx>>,
) {
usage_map.for_each_inlined_used_item(tcx, item, |inlined_item| {
let is_new = visited.insert(inlined_item);
@ -320,7 +322,7 @@ fn merge_codegen_units<'tcx>(
assert!(codegen_units.is_sorted_by(|a, b| a.name().as_str() <= b.name().as_str()));
// This map keeps track of what got merged into what.
let mut cgu_contents: FxHashMap<Symbol, Vec<Symbol>> =
let mut cgu_contents: UnordMap<Symbol, Vec<Symbol>> =
codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect();
// If N is the maximum number of CGUs, and the CGUs are sorted from largest
@ -422,22 +424,24 @@ fn merge_codegen_units<'tcx>(
// For CGUs that contain the code of multiple modules because of the
// merging done above, we use a concatenation of the names of all
// contained CGUs.
let new_cgu_names: FxHashMap<Symbol, String> = cgu_contents
.into_iter()
let new_cgu_names = UnordMap::from(
cgu_contents
.items()
// This `filter` makes sure we only update the name of CGUs that
// were actually modified by merging.
.filter(|(_, cgu_contents)| cgu_contents.len() > 1)
.map(|(current_cgu_name, cgu_contents)| {
let mut cgu_contents: Vec<&str> = cgu_contents.iter().map(|s| s.as_str()).collect();
let mut cgu_contents: Vec<&str> =
cgu_contents.iter().map(|s| s.as_str()).collect();
// Sort the names, so things are deterministic and easy to
// predict. We are sorting primitive `&str`s here so we can
// use unstable sort.
cgu_contents.sort_unstable();
(current_cgu_name, cgu_contents.join("--"))
})
.collect();
(*current_cgu_name, cgu_contents.join("--"))
}),
);
for cgu in codegen_units.iter_mut() {
if let Some(new_cgu_name) = new_cgu_names.get(&cgu.name()) {
@ -511,7 +515,7 @@ fn compute_inlined_overlap<'tcx>(cgu1: &CodegenUnit<'tcx>, cgu2: &CodegenUnit<'t
fn internalize_symbols<'tcx>(
cx: &PartitioningCx<'_, 'tcx>,
codegen_units: &mut [CodegenUnit<'tcx>],
internalization_candidates: FxHashSet<MonoItem<'tcx>>,
internalization_candidates: UnordSet<MonoItem<'tcx>>,
) {
/// For symbol internalization, we need to know whether a symbol/mono-item
/// is used from outside the codegen unit it is defined in. This type is
@ -522,7 +526,7 @@ fn internalize_symbols<'tcx>(
MultipleCgus,
}
let mut mono_item_placements = FxHashMap::default();
let mut mono_item_placements = UnordMap::default();
let single_codegen_unit = codegen_units.len() == 1;
if !single_codegen_unit {
@ -739,7 +743,7 @@ fn mono_item_linkage_and_visibility<'tcx>(
(Linkage::External, vis)
}
type CguNameCache = FxHashMap<(DefId, bool), Symbol>;
type CguNameCache = UnordMap<(DefId, bool), Symbol>;
fn static_visibility<'tcx>(
tcx: TyCtxt<'tcx>,
@ -932,7 +936,7 @@ fn debug_dump<'a, 'tcx: 'a>(tcx: TyCtxt<'tcx>, label: &str, cgus: &[CodegenUnit<
//
// Also, unreached inlined items won't be counted here. This is fine.
let mut inlined_items = FxHashSet::default();
let mut inlined_items = UnordSet::default();
let mut root_items = 0;
let mut unique_inlined_items = 0;
@ -1164,7 +1168,7 @@ fn collect_and_partition_mono_items(tcx: TyCtxt<'_>, (): ()) -> (&DefIdSet, &[Co
}
if tcx.sess.opts.unstable_opts.print_mono_items.is_some() {
let mut item_to_cgus: FxHashMap<_, Vec<_>> = Default::default();
let mut item_to_cgus: UnordMap<_, Vec<_>> = Default::default();
for cgu in codegen_units {
for (&mono_item, &data) in cgu.items() {
@ -1240,7 +1244,7 @@ fn dump_mono_items_stats<'tcx>(
let mut file = BufWriter::new(file);
// Gather instantiated mono items grouped by def_id
let mut items_per_def_id: FxHashMap<_, Vec<_>> = Default::default();
let mut items_per_def_id: FxIndexMap<_, Vec<_>> = Default::default();
for cgu in codegen_units {
cgu.items()
.keys()