forked from OSchip/llvm-project
[LTO] Implement parallel Codegen for LTO using splitCodeGen.
Parallelism level can be chosen using the new --lto-jobs=K option where K is the number of threads used for CodeGen. It currently defaults to 1. llvm-svn: 266484
This commit is contained in:
parent
b5e4804aee
commit
bc176631cd
|
@ -94,6 +94,7 @@ struct Configuration {
|
|||
ELFKind EKind = ELFNoneKind;
|
||||
uint16_t EMachine = llvm::ELF::EM_NONE;
|
||||
uint64_t EntryAddr = -1;
|
||||
unsigned LtoJobs;
|
||||
unsigned LtoO;
|
||||
unsigned Optimize;
|
||||
};
|
||||
|
|
|
@ -321,6 +321,9 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) {
|
|||
Config->LtoO = getInteger(Args, OPT_lto_O, 2);
|
||||
if (Config->LtoO > 3)
|
||||
error("invalid optimization level for LTO: " + getString(Args, OPT_lto_O));
|
||||
Config->LtoJobs = getInteger(Args, OPT_lto_jobs, 1);
|
||||
if (Config->LtoJobs == 0)
|
||||
error("number of threads must be > 0");
|
||||
|
||||
Config->ZExecStack = hasZOption(Args, "execstack");
|
||||
Config->ZNodelete = hasZOption(Args, "nodelete");
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/Bitcode/ReaderWriter.h"
|
||||
#include "llvm/CodeGen/CommandFlags.h"
|
||||
#include "llvm/CodeGen/ParallelCG.h"
|
||||
#include "llvm/IR/LegacyPassManager.h"
|
||||
#include "llvm/Linker/IRMover.h"
|
||||
#include "llvm/Support/StringSaver.h"
|
||||
|
@ -33,10 +34,13 @@ using namespace lld;
|
|||
using namespace lld::elf;
|
||||
|
||||
// This is for use when debugging LTO.
|
||||
static void saveLtoObjectFile(StringRef Buffer) {
|
||||
static void saveLtoObjectFile(StringRef Buffer, unsigned I, bool Many) {
|
||||
SmallString<128> Filename = Config->OutputFile;
|
||||
if (Many)
|
||||
Filename += utostr(I);
|
||||
Filename += ".lto.o";
|
||||
std::error_code EC;
|
||||
raw_fd_ostream OS(Config->OutputFile.str() + ".lto.o", EC,
|
||||
sys::fs::OpenFlags::F_None);
|
||||
raw_fd_ostream OS(Filename, EC, sys::fs::OpenFlags::F_None);
|
||||
check(EC);
|
||||
OS << Buffer;
|
||||
}
|
||||
|
@ -136,9 +140,36 @@ static void internalize(GlobalValue &GV) {
|
|||
GV.setLinkage(GlobalValue::InternalLinkage);
|
||||
}
|
||||
|
||||
std::vector<std::unique_ptr<InputFile>> BitcodeCompiler::runSplitCodegen() {
|
||||
unsigned NumThreads = Config->LtoJobs;
|
||||
OwningData.resize(NumThreads);
|
||||
|
||||
std::list<raw_svector_ostream> OSs;
|
||||
std::vector<raw_pwrite_stream *> OSPtrs;
|
||||
for (SmallString<0> &Obj : OwningData) {
|
||||
OSs.emplace_back(Obj);
|
||||
OSPtrs.push_back(&OSs.back());
|
||||
}
|
||||
|
||||
splitCodeGen(std::move(Combined), OSPtrs, {},
|
||||
[this]() { return getTargetMachine(); });
|
||||
|
||||
std::vector<std::unique_ptr<InputFile>> ObjFiles;
|
||||
for (SmallString<0> &Obj : OwningData)
|
||||
ObjFiles.push_back(createObjectFile(
|
||||
MemoryBufferRef(Obj, "LLD-INTERNAL-combined-lto-object")));
|
||||
|
||||
if (Config->SaveTemps)
|
||||
for (unsigned I = 0; I < NumThreads; ++I)
|
||||
saveLtoObjectFile(OwningData[I], I, NumThreads > 1);
|
||||
|
||||
return ObjFiles;
|
||||
}
|
||||
|
||||
// Merge all the bitcode files we have seen, codegen the result
|
||||
// and return the resulting ObjectFile.
|
||||
std::unique_ptr<InputFile> BitcodeCompiler::compile() {
|
||||
std::vector<std::unique_ptr<InputFile>> BitcodeCompiler::compile() {
|
||||
TheTriple = Combined->getTargetTriple();
|
||||
for (const auto &Name : InternalizedSyms) {
|
||||
GlobalValue *GV = Combined->getNamedValue(Name.first());
|
||||
assert(GV);
|
||||
|
@ -151,26 +182,16 @@ std::unique_ptr<InputFile> BitcodeCompiler::compile() {
|
|||
std::unique_ptr<TargetMachine> TM(getTargetMachine());
|
||||
runLTOPasses(*Combined, *TM);
|
||||
|
||||
raw_svector_ostream OS(OwningData);
|
||||
legacy::PassManager CodeGenPasses;
|
||||
if (TM->addPassesToEmitFile(CodeGenPasses, OS,
|
||||
TargetMachine::CGFT_ObjectFile))
|
||||
fatal("failed to setup codegen");
|
||||
CodeGenPasses.run(*Combined);
|
||||
MB = MemoryBuffer::getMemBuffer(OwningData,
|
||||
"LLD-INTERNAL-combined-lto-object", false);
|
||||
if (Config->SaveTemps)
|
||||
saveLtoObjectFile(MB->getBuffer());
|
||||
return createObjectFile(*MB);
|
||||
return runSplitCodegen();
|
||||
}
|
||||
|
||||
TargetMachine *BitcodeCompiler::getTargetMachine() {
|
||||
StringRef TripleStr = Combined->getTargetTriple();
|
||||
std::unique_ptr<TargetMachine> BitcodeCompiler::getTargetMachine() {
|
||||
std::string Msg;
|
||||
const Target *T = TargetRegistry::lookupTarget(TripleStr, Msg);
|
||||
const Target *T = TargetRegistry::lookupTarget(TheTriple, Msg);
|
||||
if (!T)
|
||||
fatal("target not found: " + Msg);
|
||||
TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
|
||||
Reloc::Model R = Config->Pic ? Reloc::PIC_ : Reloc::Static;
|
||||
return T->createTargetMachine(TripleStr, "", "", Options, R);
|
||||
return std::unique_ptr<TargetMachine>(
|
||||
T->createTargetMachine(TheTriple, "", "", Options, R));
|
||||
}
|
||||
|
|
|
@ -37,20 +37,22 @@ class InputFile;
|
|||
class BitcodeCompiler {
|
||||
public:
|
||||
void add(BitcodeFile &F);
|
||||
std::unique_ptr<InputFile> compile();
|
||||
std::vector<std::unique_ptr<InputFile>> compile();
|
||||
|
||||
BitcodeCompiler()
|
||||
: Combined(new llvm::Module("ld-temp.o", Context)), Mover(*Combined) {}
|
||||
|
||||
private:
|
||||
llvm::TargetMachine *getTargetMachine();
|
||||
std::vector<std::unique_ptr<InputFile>> runSplitCodegen();
|
||||
std::unique_ptr<llvm::TargetMachine> getTargetMachine();
|
||||
|
||||
llvm::LLVMContext Context;
|
||||
std::unique_ptr<llvm::Module> Combined;
|
||||
llvm::IRMover Mover;
|
||||
SmallString<0> OwningData;
|
||||
std::vector<SmallString<0>> OwningData;
|
||||
std::unique_ptr<MemoryBuffer> MB;
|
||||
llvm::StringSet<> InternalizedSyms;
|
||||
std::string TheTriple;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -237,7 +237,9 @@ def G : Separate<["-"], "G">;
|
|||
// Aliases for ignored options
|
||||
def alias_version_script_version_script : Joined<["--"], "version-script=">, Alias<version_script>;
|
||||
|
||||
// Debugging/developer options
|
||||
// LTO-related options.
|
||||
def lto_jobs : Joined<["--"], "lto-jobs=">,
|
||||
HelpText<"Number of threads to run codegen">;
|
||||
def disable_verify : Flag<["-"], "disable-verify">;
|
||||
def mllvm : Separate<["-"], "mllvm">;
|
||||
def save_temps : Flag<["-"], "save-temps">;
|
||||
|
|
|
@ -122,24 +122,27 @@ template <class ELFT> void SymbolTable<ELFT>::addCombinedLtoObject() {
|
|||
Lto.reset(new BitcodeCompiler);
|
||||
for (const std::unique_ptr<BitcodeFile> &F : BitcodeFiles)
|
||||
Lto->add(*F);
|
||||
std::unique_ptr<InputFile> IF = Lto->compile();
|
||||
ObjectFile<ELFT> *Obj = cast<ObjectFile<ELFT>>(IF.release());
|
||||
std::vector<std::unique_ptr<InputFile>> IFs = Lto->compile();
|
||||
|
||||
// Replace bitcode symbols.
|
||||
llvm::DenseSet<StringRef> DummyGroups;
|
||||
Obj->parse(DummyGroups);
|
||||
for (SymbolBody *Body : Obj->getNonLocalSymbols()) {
|
||||
Symbol *Sym = insert(Body);
|
||||
Sym->Body->setUsedInRegularObj();
|
||||
if (Sym->Body->isShared())
|
||||
Sym->Body->MustBeInDynSym = true;
|
||||
if (Sym->Body->MustBeInDynSym)
|
||||
Body->MustBeInDynSym = true;
|
||||
if (!Sym->Body->isUndefined() && Body->isUndefined())
|
||||
continue;
|
||||
Sym->Body = Body;
|
||||
for (auto &IF : IFs) {
|
||||
ObjectFile<ELFT> *Obj = cast<ObjectFile<ELFT>>(IF.release());
|
||||
|
||||
llvm::DenseSet<StringRef> DummyGroups;
|
||||
Obj->parse(DummyGroups);
|
||||
for (SymbolBody *Body : Obj->getNonLocalSymbols()) {
|
||||
Symbol *Sym = insert(Body);
|
||||
Sym->Body->setUsedInRegularObj();
|
||||
if (Sym->Body->isShared())
|
||||
Sym->Body->MustBeInDynSym = true;
|
||||
if (Sym->Body->MustBeInDynSym)
|
||||
Body->MustBeInDynSym = true;
|
||||
if (!Sym->Body->isUndefined() && Body->isUndefined())
|
||||
continue;
|
||||
Sym->Body = Body;
|
||||
}
|
||||
ObjectFiles.emplace_back(Obj);
|
||||
}
|
||||
ObjectFiles.emplace_back(Obj);
|
||||
}
|
||||
|
||||
// Add an undefined symbol.
|
||||
|
|
|
@ -214,3 +214,6 @@ _start:
|
|||
|
||||
# RUN: not ld.lld %t -o %t -m wrong_emul 2>&1 | FileCheck --check-prefix=UNKNOWN_EMUL %s
|
||||
# UNKNOWN_EMUL: unknown emulation: wrong_emul
|
||||
|
||||
# RUN: not ld.lld %t --lto-jobs=0 2>&1 | FileCheck --check-prefix=NOTHREADS %s
|
||||
# NOTHREADS: number of threads must be > 0
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
; RUN: llvm-as -o %t.bc %s
|
||||
; RUN: ld.lld -m elf_x86_64 --lto-jobs=2 -save-temps -o %t %t.bc -shared
|
||||
; RUN: llvm-nm %t0.lto.o | FileCheck --check-prefix=CHECK0 %s
|
||||
; RUN: llvm-nm %t1.lto.o | FileCheck --check-prefix=CHECK1 %s
|
||||
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
; CHECK0-NOT: bar
|
||||
; CHECK0: T foo
|
||||
; CHECK0-NOT: bar
|
||||
define void @foo() {
|
||||
call void @bar()
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK1-NOT: foo
|
||||
; CHECK1: T bar
|
||||
; CHECK1-NOT: foo
|
||||
define void @bar() {
|
||||
call void @foo()
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue