[LTO] Implement parallel Codegen for LTO using splitCodeGen.

Parallelism level can be chosen using the new --lto-jobs=K option
where K is the number of threads used for CodeGen. It currently
defaults to 1.

llvm-svn: 266484
This commit is contained in:
Davide Italiano 2016-04-15 22:38:10 +00:00
parent b5e4804aee
commit bc176631cd
8 changed files with 95 additions and 38 deletions

View File

@ -94,6 +94,7 @@ struct Configuration {
ELFKind EKind = ELFNoneKind;
uint16_t EMachine = llvm::ELF::EM_NONE;
uint64_t EntryAddr = -1;
unsigned LtoJobs;
unsigned LtoO;
unsigned Optimize;
};

View File

@ -321,6 +321,9 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) {
Config->LtoO = getInteger(Args, OPT_lto_O, 2);
if (Config->LtoO > 3)
error("invalid optimization level for LTO: " + getString(Args, OPT_lto_O));
Config->LtoJobs = getInteger(Args, OPT_lto_jobs, 1);
if (Config->LtoJobs == 0)
error("number of threads must be > 0");
Config->ZExecStack = hasZOption(Args, "execstack");
Config->ZNodelete = hasZOption(Args, "nodelete");

View File

@ -16,6 +16,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/CodeGen/ParallelCG.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Linker/IRMover.h"
#include "llvm/Support/StringSaver.h"
@ -33,10 +34,13 @@ using namespace lld;
using namespace lld::elf;
// This is for use when debugging LTO.
static void saveLtoObjectFile(StringRef Buffer) {
static void saveLtoObjectFile(StringRef Buffer, unsigned I, bool Many) {
SmallString<128> Filename = Config->OutputFile;
if (Many)
Filename += utostr(I);
Filename += ".lto.o";
std::error_code EC;
raw_fd_ostream OS(Config->OutputFile.str() + ".lto.o", EC,
sys::fs::OpenFlags::F_None);
raw_fd_ostream OS(Filename, EC, sys::fs::OpenFlags::F_None);
check(EC);
OS << Buffer;
}
@ -136,9 +140,36 @@ static void internalize(GlobalValue &GV) {
GV.setLinkage(GlobalValue::InternalLinkage);
}
std::vector<std::unique_ptr<InputFile>> BitcodeCompiler::runSplitCodegen() {
unsigned NumThreads = Config->LtoJobs;
OwningData.resize(NumThreads);
std::list<raw_svector_ostream> OSs;
std::vector<raw_pwrite_stream *> OSPtrs;
for (SmallString<0> &Obj : OwningData) {
OSs.emplace_back(Obj);
OSPtrs.push_back(&OSs.back());
}
splitCodeGen(std::move(Combined), OSPtrs, {},
[this]() { return getTargetMachine(); });
std::vector<std::unique_ptr<InputFile>> ObjFiles;
for (SmallString<0> &Obj : OwningData)
ObjFiles.push_back(createObjectFile(
MemoryBufferRef(Obj, "LLD-INTERNAL-combined-lto-object")));
if (Config->SaveTemps)
for (unsigned I = 0; I < NumThreads; ++I)
saveLtoObjectFile(OwningData[I], I, NumThreads > 1);
return ObjFiles;
}
// Merge all the bitcode files we have seen, codegen the result
// and return the resulting ObjectFile.
std::unique_ptr<InputFile> BitcodeCompiler::compile() {
std::vector<std::unique_ptr<InputFile>> BitcodeCompiler::compile() {
TheTriple = Combined->getTargetTriple();
for (const auto &Name : InternalizedSyms) {
GlobalValue *GV = Combined->getNamedValue(Name.first());
assert(GV);
@ -151,26 +182,16 @@ std::unique_ptr<InputFile> BitcodeCompiler::compile() {
std::unique_ptr<TargetMachine> TM(getTargetMachine());
runLTOPasses(*Combined, *TM);
raw_svector_ostream OS(OwningData);
legacy::PassManager CodeGenPasses;
if (TM->addPassesToEmitFile(CodeGenPasses, OS,
TargetMachine::CGFT_ObjectFile))
fatal("failed to setup codegen");
CodeGenPasses.run(*Combined);
MB = MemoryBuffer::getMemBuffer(OwningData,
"LLD-INTERNAL-combined-lto-object", false);
if (Config->SaveTemps)
saveLtoObjectFile(MB->getBuffer());
return createObjectFile(*MB);
return runSplitCodegen();
}
TargetMachine *BitcodeCompiler::getTargetMachine() {
StringRef TripleStr = Combined->getTargetTriple();
std::unique_ptr<TargetMachine> BitcodeCompiler::getTargetMachine() {
std::string Msg;
const Target *T = TargetRegistry::lookupTarget(TripleStr, Msg);
const Target *T = TargetRegistry::lookupTarget(TheTriple, Msg);
if (!T)
fatal("target not found: " + Msg);
TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
Reloc::Model R = Config->Pic ? Reloc::PIC_ : Reloc::Static;
return T->createTargetMachine(TripleStr, "", "", Options, R);
return std::unique_ptr<TargetMachine>(
T->createTargetMachine(TheTriple, "", "", Options, R));
}

View File

@ -37,20 +37,22 @@ class InputFile;
class BitcodeCompiler {
public:
void add(BitcodeFile &F);
std::unique_ptr<InputFile> compile();
std::vector<std::unique_ptr<InputFile>> compile();
BitcodeCompiler()
: Combined(new llvm::Module("ld-temp.o", Context)), Mover(*Combined) {}
private:
llvm::TargetMachine *getTargetMachine();
std::vector<std::unique_ptr<InputFile>> runSplitCodegen();
std::unique_ptr<llvm::TargetMachine> getTargetMachine();
llvm::LLVMContext Context;
std::unique_ptr<llvm::Module> Combined;
llvm::IRMover Mover;
SmallString<0> OwningData;
std::vector<SmallString<0>> OwningData;
std::unique_ptr<MemoryBuffer> MB;
llvm::StringSet<> InternalizedSyms;
std::string TheTriple;
};
}
}

View File

@ -237,7 +237,9 @@ def G : Separate<["-"], "G">;
// Aliases for ignored options
def alias_version_script_version_script : Joined<["--"], "version-script=">, Alias<version_script>;
// Debugging/developer options
// LTO-related options.
def lto_jobs : Joined<["--"], "lto-jobs=">,
HelpText<"Number of threads to run codegen">;
def disable_verify : Flag<["-"], "disable-verify">;
def mllvm : Separate<["-"], "mllvm">;
def save_temps : Flag<["-"], "save-temps">;

View File

@ -122,24 +122,27 @@ template <class ELFT> void SymbolTable<ELFT>::addCombinedLtoObject() {
Lto.reset(new BitcodeCompiler);
for (const std::unique_ptr<BitcodeFile> &F : BitcodeFiles)
Lto->add(*F);
std::unique_ptr<InputFile> IF = Lto->compile();
ObjectFile<ELFT> *Obj = cast<ObjectFile<ELFT>>(IF.release());
std::vector<std::unique_ptr<InputFile>> IFs = Lto->compile();
// Replace bitcode symbols.
llvm::DenseSet<StringRef> DummyGroups;
Obj->parse(DummyGroups);
for (SymbolBody *Body : Obj->getNonLocalSymbols()) {
Symbol *Sym = insert(Body);
Sym->Body->setUsedInRegularObj();
if (Sym->Body->isShared())
Sym->Body->MustBeInDynSym = true;
if (Sym->Body->MustBeInDynSym)
Body->MustBeInDynSym = true;
if (!Sym->Body->isUndefined() && Body->isUndefined())
continue;
Sym->Body = Body;
for (auto &IF : IFs) {
ObjectFile<ELFT> *Obj = cast<ObjectFile<ELFT>>(IF.release());
llvm::DenseSet<StringRef> DummyGroups;
Obj->parse(DummyGroups);
for (SymbolBody *Body : Obj->getNonLocalSymbols()) {
Symbol *Sym = insert(Body);
Sym->Body->setUsedInRegularObj();
if (Sym->Body->isShared())
Sym->Body->MustBeInDynSym = true;
if (Sym->Body->MustBeInDynSym)
Body->MustBeInDynSym = true;
if (!Sym->Body->isUndefined() && Body->isUndefined())
continue;
Sym->Body = Body;
}
ObjectFiles.emplace_back(Obj);
}
ObjectFiles.emplace_back(Obj);
}
// Add an undefined symbol.

View File

@ -214,3 +214,6 @@ _start:
# RUN: not ld.lld %t -o %t -m wrong_emul 2>&1 | FileCheck --check-prefix=UNKNOWN_EMUL %s
# UNKNOWN_EMUL: unknown emulation: wrong_emul
# RUN: not ld.lld %t --lto-jobs=0 2>&1 | FileCheck --check-prefix=NOTHREADS %s
# NOTHREADS: number of threads must be > 0

View File

@ -0,0 +1,22 @@
; RUN: llvm-as -o %t.bc %s
; RUN: ld.lld -m elf_x86_64 --lto-jobs=2 -save-temps -o %t %t.bc -shared
; RUN: llvm-nm %t0.lto.o | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-nm %t1.lto.o | FileCheck --check-prefix=CHECK1 %s
target triple = "x86_64-unknown-linux-gnu"
; CHECK0-NOT: bar
; CHECK0: T foo
; CHECK0-NOT: bar
define void @foo() {
call void @bar()
ret void
}
; CHECK1-NOT: foo
; CHECK1: T bar
; CHECK1-NOT: foo
define void @bar() {
call void @foo()
ret void
}