Rollup merge of #125345 - durin42:thin-link-bitcode, r=bjorn3

rustc_codegen_llvm: add support for writing summary bitcode

Typical uses of ThinLTO don't have any use for this as a standalone file, but distributed ThinLTO uses this to make the linker phase more efficient. With clang you'd do something like `clang -flto=thin -fthin-link-bitcode=foo.indexing.o -c foo.c` and then get both foo.o (full of bitcode) and foo.indexing.o (just the summary or index part of the bitcode). That's then usable by a two-stage linking process that's more friendly to distributed build systems like bazel, which is why I'm working on this area.

I talked some to `@teresajohnson` about naming in this area, as things seem to be a little confused between various blog posts and build systems. "bitcode index" and "bitcode summary" tend to be a little too ambiguous, and she tends to use "thin link bitcode" and "minimized bitcode" (which matches the descriptions in LLVM). Since the clang option is thin-link-bitcode, I went with that to try and not add a new spelling in the world.

Per `@dtolnay,` you can work around the lack of this by using `lld --thinlto-index-only` to do the indexing on regular .o files of bitcode, but that is a bit wasteful on actions when we already have all the information in rustc and could just write out the matching minimized bitcode. I didn't test that at all in our infrastructure, because by the time I learned that I already had this patch largely written.
This commit is contained in:
Guillaume Gomez 2024-05-23 23:39:26 +02:00 committed by GitHub
commit 4ee97fc3db
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 107 additions and 18 deletions

View File

@ -200,7 +200,7 @@ fn produce_final_output_artifacts(
// to get rid of it.
for output_type in crate_output.outputs.keys() {
match *output_type {
OutputType::Bitcode => {
OutputType::Bitcode | OutputType::ThinLinkBitcode => {
// Cranelift doesn't have bitcode
// user_wants_bitcode = true;
// // Copy to .bc, but always keep the .0.bc. There is a later

View File

@ -335,6 +335,10 @@ impl ThinBufferMethods for ThinBuffer {
fn data(&self) -> &[u8] {
unimplemented!();
}
fn thin_link_data(&self) -> &[u8] {
unimplemented!();
}
}
pub struct GccContext {
@ -414,7 +418,7 @@ impl WriteBackendMethods for GccCodegenBackend {
back::write::codegen(cgcx, dcx, module, config)
}
fn prepare_thin(_module: ModuleCodegen<Self::Module>) -> (String, Self::ThinBuffer) {
fn prepare_thin(_module: ModuleCodegen<Self::Module>, _emit_summary: bool) -> (String, Self::ThinBuffer) {
unimplemented!();
}

View File

@ -229,9 +229,12 @@ pub(crate) fn run_thin(
thin_lto(cgcx, &dcx, modules, upstream_modules, cached_modules, &symbols_below_threshold)
}
pub(crate) fn prepare_thin(module: ModuleCodegen<ModuleLlvm>) -> (String, ThinBuffer) {
pub(crate) fn prepare_thin(
module: ModuleCodegen<ModuleLlvm>,
emit_summary: bool,
) -> (String, ThinBuffer) {
let name = module.name;
let buffer = ThinBuffer::new(module.module_llvm.llmod(), true);
let buffer = ThinBuffer::new(module.module_llvm.llmod(), true, emit_summary);
(name, buffer)
}
@ -671,9 +674,9 @@ unsafe impl Send for ThinBuffer {}
unsafe impl Sync for ThinBuffer {}
impl ThinBuffer {
pub fn new(m: &llvm::Module, is_thin: bool) -> ThinBuffer {
pub fn new(m: &llvm::Module, is_thin: bool, emit_summary: bool) -> ThinBuffer {
unsafe {
let buffer = llvm::LLVMRustThinLTOBufferCreate(m, is_thin);
let buffer = llvm::LLVMRustThinLTOBufferCreate(m, is_thin, emit_summary);
ThinBuffer(buffer)
}
}
@ -687,6 +690,14 @@ impl ThinBufferMethods for ThinBuffer {
slice::from_raw_parts(ptr, len)
}
}
fn thin_link_data(&self) -> &[u8] {
unsafe {
let ptr = llvm::LLVMRustThinLTOBufferThinLinkDataPtr(self.0) as *const _;
let len = llvm::LLVMRustThinLTOBufferThinLinkDataLen(self.0);
slice::from_raw_parts(ptr, len)
}
}
}
impl Drop for ThinBuffer {

View File

@ -708,13 +708,15 @@ pub(crate) unsafe fn codegen(
// asm from LLVM and use `gcc` to create the object file.
let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
let bc_summary_out =
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name);
if config.bitcode_needed() {
let _timer = cgcx
.prof
.generic_activity_with_arg("LLVM_module_codegen_make_bitcode", &*module.name);
let thin = ThinBuffer::new(llmod, config.emit_thin_lto);
let thin = ThinBuffer::new(llmod, config.emit_thin_lto, config.emit_thin_lto_summary);
let data = thin.data();
if let Some(bitcode_filename) = bc_out.file_name() {
@ -725,6 +727,25 @@ pub(crate) unsafe fn codegen(
);
}
if config.emit_thin_lto_summary
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
{
let summary_data = thin.thin_link_data();
cgcx.prof.artifact_size(
"llvm_bitcode_summary",
thin_link_bitcode_filename.to_string_lossy(),
summary_data.len() as u64,
);
let _timer = cgcx.prof.generic_activity_with_arg(
"LLVM_module_codegen_emit_bitcode_summary",
&*module.name,
);
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
}
}
if config.emit_bc || config.emit_obj == EmitObj::Bitcode {
let _timer = cgcx
.prof

View File

@ -240,8 +240,11 @@ impl WriteBackendMethods for LlvmCodegenBackend {
) -> Result<CompiledModule, FatalError> {
back::write::codegen(cgcx, dcx, module, config)
}
fn prepare_thin(module: ModuleCodegen<Self::Module>) -> (String, Self::ThinBuffer) {
back::lto::prepare_thin(module)
fn prepare_thin(
module: ModuleCodegen<Self::Module>,
emit_summary: bool,
) -> (String, Self::ThinBuffer) {
back::lto::prepare_thin(module, emit_summary)
}
fn serialize_module(module: ModuleCodegen<Self::Module>) -> (String, Self::ModuleBuffer) {
(module.name, back::lto::ModuleBuffer::new(module.module_llvm.llmod()))

View File

@ -2350,10 +2350,16 @@ extern "C" {
#[allow(improper_ctypes)]
pub fn LLVMRustModuleInstructionStats(M: &Module, Str: &RustString);
pub fn LLVMRustThinLTOBufferCreate(M: &Module, is_thin: bool) -> &'static mut ThinLTOBuffer;
pub fn LLVMRustThinLTOBufferCreate(
M: &Module,
is_thin: bool,
emit_summary: bool,
) -> &'static mut ThinLTOBuffer;
pub fn LLVMRustThinLTOBufferFree(M: &'static mut ThinLTOBuffer);
pub fn LLVMRustThinLTOBufferPtr(M: &ThinLTOBuffer) -> *const c_char;
pub fn LLVMRustThinLTOBufferLen(M: &ThinLTOBuffer) -> size_t;
pub fn LLVMRustThinLTOBufferThinLinkDataPtr(M: &ThinLTOBuffer) -> *const c_char;
pub fn LLVMRustThinLTOBufferThinLinkDataLen(M: &ThinLTOBuffer) -> size_t;
pub fn LLVMRustCreateThinLTOData(
Modules: *const ThinLTOModule,
NumModules: c_uint,

View File

@ -107,6 +107,7 @@ pub struct ModuleConfig {
pub emit_asm: bool,
pub emit_obj: EmitObj,
pub emit_thin_lto: bool,
pub emit_thin_lto_summary: bool,
pub bc_cmdline: String,
// Miscellaneous flags. These are mostly copied from command-line
@ -231,6 +232,10 @@ impl ModuleConfig {
),
emit_obj,
emit_thin_lto: sess.opts.unstable_opts.emit_thin_lto,
emit_thin_lto_summary: if_regular!(
sess.opts.output_types.contains_key(&OutputType::ThinLinkBitcode),
false
),
bc_cmdline: sess.target.bitcode_llvm_cmdline.to_string(),
verify_llvm_ir: sess.verify_llvm_ir(),
@ -282,6 +287,7 @@ impl ModuleConfig {
pub fn bitcode_needed(&self) -> bool {
self.emit_bc
|| self.emit_thin_lto_summary
|| self.emit_obj == EmitObj::Bitcode
|| self.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full)
}
@ -629,6 +635,9 @@ fn produce_final_output_artifacts(
// them for making an rlib.
copy_if_one_unit(OutputType::Bitcode, true);
}
OutputType::ThinLinkBitcode => {
copy_if_one_unit(OutputType::ThinLinkBitcode, false);
}
OutputType::LlvmAssembly => {
copy_if_one_unit(OutputType::LlvmAssembly, false);
}
@ -882,7 +891,7 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
match lto_type {
ComputedLtoType::No => finish_intra_module_work(cgcx, module, module_config),
ComputedLtoType::Thin => {
let (name, thin_buffer) = B::prepare_thin(module);
let (name, thin_buffer) = B::prepare_thin(module, false);
if let Some(path) = bitcode {
fs::write(&path, thin_buffer.data()).unwrap_or_else(|e| {
panic!("Error writing pre-lto-bitcode file `{}`: {}", path.display(), e);

View File

@ -56,12 +56,16 @@ pub trait WriteBackendMethods: 'static + Sized + Clone {
module: ModuleCodegen<Self::Module>,
config: &ModuleConfig,
) -> Result<CompiledModule, FatalError>;
fn prepare_thin(module: ModuleCodegen<Self::Module>) -> (String, Self::ThinBuffer);
fn prepare_thin(
module: ModuleCodegen<Self::Module>,
want_summary: bool,
) -> (String, Self::ThinBuffer);
fn serialize_module(module: ModuleCodegen<Self::Module>) -> (String, Self::ModuleBuffer);
}
pub trait ThinBufferMethods: Send + Sync {
fn data(&self) -> &[u8];
fn thin_link_data(&self) -> &[u8];
}
pub trait ModuleBufferMethods: Send + Sync {

View File

@ -1488,13 +1488,15 @@ LLVMRustPrepareThinLTOImport(const LLVMRustThinLTOData *Data, LLVMModuleRef M,
// a ThinLTO summary attached.
struct LLVMRustThinLTOBuffer {
std::string data;
std::string thin_link_data;
};
extern "C" LLVMRustThinLTOBuffer*
LLVMRustThinLTOBufferCreate(LLVMModuleRef M, bool is_thin) {
LLVMRustThinLTOBufferCreate(LLVMModuleRef M, bool is_thin, bool emit_summary) {
auto Ret = std::make_unique<LLVMRustThinLTOBuffer>();
{
auto OS = raw_string_ostream(Ret->data);
auto ThinLinkOS = raw_string_ostream(Ret->thin_link_data);
{
if (is_thin) {
PassBuilder PB;
@ -1508,7 +1510,10 @@ LLVMRustThinLTOBufferCreate(LLVMModuleRef M, bool is_thin) {
PB.registerLoopAnalyses(LAM);
PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
ModulePassManager MPM;
MPM.addPass(ThinLTOBitcodeWriterPass(OS, nullptr));
// We only pass ThinLinkOS to be filled in if we want the summary,
// because otherwise LLVM does extra work and may double-emit some
// errors or warnings.
MPM.addPass(ThinLTOBitcodeWriterPass(OS, emit_summary ? &ThinLinkOS : nullptr));
MPM.run(*unwrap(M), MAM);
} else {
WriteBitcodeToFile(*unwrap(M), OS);
@ -1533,6 +1538,16 @@ LLVMRustThinLTOBufferLen(const LLVMRustThinLTOBuffer *Buffer) {
return Buffer->data.length();
}
extern "C" const void*
LLVMRustThinLTOBufferThinLinkDataPtr(const LLVMRustThinLTOBuffer *Buffer) {
return Buffer->thin_link_data.data();
}
extern "C" size_t
LLVMRustThinLTOBufferThinLinkDataLen(const LLVMRustThinLTOBuffer *Buffer) {
return Buffer->thin_link_data.length();
}
// This is what we used to parse upstream bitcode for actual ThinLTO
// processing. We'll call this once per module optimized through ThinLTO, and
// it'll be called concurrently on many threads.

View File

@ -465,6 +465,7 @@ impl FromStr for SplitDwarfKind {
#[derive(Encodable, Decodable)]
pub enum OutputType {
Bitcode,
ThinLinkBitcode,
Assembly,
LlvmAssembly,
Mir,
@ -492,6 +493,7 @@ impl OutputType {
match *self {
OutputType::Exe | OutputType::DepInfo | OutputType::Metadata => true,
OutputType::Bitcode
| OutputType::ThinLinkBitcode
| OutputType::Assembly
| OutputType::LlvmAssembly
| OutputType::Mir
@ -502,6 +504,7 @@ impl OutputType {
pub fn shorthand(&self) -> &'static str {
match *self {
OutputType::Bitcode => "llvm-bc",
OutputType::ThinLinkBitcode => "thin-link-bitcode",
OutputType::Assembly => "asm",
OutputType::LlvmAssembly => "llvm-ir",
OutputType::Mir => "mir",
@ -518,6 +521,7 @@ impl OutputType {
"llvm-ir" => OutputType::LlvmAssembly,
"mir" => OutputType::Mir,
"llvm-bc" => OutputType::Bitcode,
"thin-link-bitcode" => OutputType::ThinLinkBitcode,
"obj" => OutputType::Object,
"metadata" => OutputType::Metadata,
"link" => OutputType::Exe,
@ -528,8 +532,9 @@ impl OutputType {
fn shorthands_display() -> String {
format!(
"`{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`",
"`{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`",
OutputType::Bitcode.shorthand(),
OutputType::ThinLinkBitcode.shorthand(),
OutputType::Assembly.shorthand(),
OutputType::LlvmAssembly.shorthand(),
OutputType::Mir.shorthand(),
@ -543,6 +548,7 @@ impl OutputType {
pub fn extension(&self) -> &'static str {
match *self {
OutputType::Bitcode => "bc",
OutputType::ThinLinkBitcode => "indexing.o",
OutputType::Assembly => "s",
OutputType::LlvmAssembly => "ll",
OutputType::Mir => "mir",
@ -559,9 +565,11 @@ impl OutputType {
| OutputType::LlvmAssembly
| OutputType::Mir
| OutputType::DepInfo => true,
OutputType::Bitcode | OutputType::Object | OutputType::Metadata | OutputType::Exe => {
false
}
OutputType::Bitcode
| OutputType::ThinLinkBitcode
| OutputType::Object
| OutputType::Metadata
| OutputType::Exe => false,
}
}
}
@ -644,6 +652,7 @@ impl OutputTypes {
pub fn should_codegen(&self) -> bool {
self.0.keys().any(|k| match *k {
OutputType::Bitcode
| OutputType::ThinLinkBitcode
| OutputType::Assembly
| OutputType::LlvmAssembly
| OutputType::Mir
@ -657,6 +666,7 @@ impl OutputTypes {
pub fn should_link(&self) -> bool {
self.0.keys().any(|k| match *k {
OutputType::Bitcode
| OutputType::ThinLinkBitcode
| OutputType::Assembly
| OutputType::LlvmAssembly
| OutputType::Mir
@ -1769,6 +1779,12 @@ fn parse_output_types(
display = OutputType::shorthands_display(),
))
});
if output_type == OutputType::ThinLinkBitcode && !unstable_opts.unstable_options {
early_dcx.early_fatal(format!(
"{} requested but -Zunstable-options not specified",
OutputType::ThinLinkBitcode.shorthand()
));
}
output_types.insert(output_type, path);
}
}