[BOLT] PLT optimization

Summary:
Add an option to optimize PLT calls:

  -plt  - optimize PLT calls (requires linking with -znow)
    =none - do not optimize PLT calls
    =hot  - optimize executed (hot) PLT calls
    =all  - optimize all PLT calls

When optimized, the calls are converted to use GOT reference
indirectly. GOT entries are guaranteed to contain a valid
function pointer if lazy binding is disabled - hence the
requirement for linker's -znow option.

Note: we can add an entry to .dynamic and drop a requirement
for -znow if we were moving .dynamic to a new segment.

(cherry picked from FBD5579789)
This commit is contained in:
Maksim Panchenko 2017-08-04 11:21:05 -07:00
parent 0c07445110
commit 49d1f5698d
10 changed files with 349 additions and 74 deletions

View File

@ -149,6 +149,9 @@ public:
/// Number of functions with profile information
uint64_t NumProfiledFuncs{0};
/// True if the binary requires immediate relocation processing.
bool RequiresZNow{false};
BinaryContext(std::unique_ptr<MCContext> Ctx,
std::unique_ptr<DWARFContext> DwCtx,
std::unique_ptr<Triple> TheTriple,
@ -206,6 +209,16 @@ public:
/// Register a symbol with \p Name at a given \p Address.
MCSymbol *registerNameAtAddress(const std::string &Name, uint64_t Address) {
// Check if the Name was already registered.
const auto GSI = GlobalSymbols.find(Name);
if (GSI != GlobalSymbols.end()) {
assert(GSI->second == Address && "addresses do not match");
auto *Symbol = Ctx->lookupSymbol(Name);
assert(Symbol && "symbol should be registered with MCContext");
return Symbol;
}
// Add the name to global symbols map.
GlobalSymbols[Name] = Address;

View File

@ -4289,19 +4289,14 @@ DynoStats BinaryFunction::getDynoStats() const {
if (BC.MIA->getMemoryOperandNo(Instr) != -1) {
Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
} else if (const auto *CallSymbol = BC.MIA->getTargetSymbol(Instr)) {
if (BC.getFunctionForSymbol(CallSymbol))
continue;
auto GSI = BC.GlobalSymbols.find(CallSymbol->getName());
if (GSI == BC.GlobalSymbols.end())
continue;
auto Section = BC.getSectionForAddress(GSI->second);
if (!Section)
continue;
StringRef SectionName;
Section->getName(SectionName);
if (SectionName == ".plt") {
const auto *BF = BC.getFunctionForSymbol(CallSymbol);
if (BF && BF->isPLTFunction())
Stats[DynoStats::PLT_CALLS] += CallFreq;
}
// We don't process PLT functions and hence have to adjust
// relevant dynostats here.
Stats[DynoStats::LOADS] += CallFreq;
Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
}
}

View File

@ -339,13 +339,13 @@ private:
/// is referenced by UnitLineTable.
DWARFUnitLineTable UnitLineTable{nullptr, nullptr};
/// Offset of this function's address ranges in the .debug_ranges section of
/// the output binary.
uint32_t AddressRangesOffset{-1U};
/// Last computed hash value.
mutable uint64_t Hash{0};
/// For PLT functions it contains a symbol associated with a function
/// reference. It is nullptr for non-PLT functions.
const MCSymbol *PLTSymbol{nullptr};
/// Function order for streaming into the destination binary.
uint32_t Index{-1U};
@ -1165,6 +1165,23 @@ public:
return FunctionColdEndLabel;
}
/// Return true if this is a function representing a PLT entry.
bool isPLTFunction() const {
return PLTSymbol != nullptr;
}
/// Return PLT function reference symbol for PLT functions and nullptr for
/// non-PLT functions.
const MCSymbol *getPLTSymbol() const {
return PLTSymbol;
}
/// Set function PLT reference symbol for PLT functions.
void setPLTSymbol(const MCSymbol *Symbol) {
assert(Size == 0 && "function size should be 0 for PLT functions");
PLTSymbol = Symbol;
}
/// Register relocation type \p RelType at a given \p Address in the function
/// against \p Symbol.
/// Assert if the \p Address is not inside this function.
@ -1614,9 +1631,6 @@ public:
return *this;
}
/// Returns the offset of the function's address ranges in .debug_ranges.
uint32_t getAddressRangesOffset() const { return AddressRangesOffset; }
/// Return the profile information about the number of times
/// the function was executed.
///

View File

@ -14,6 +14,7 @@
#include "Passes/FrameOptimizer.h"
#include "Passes/IndirectCallPromotion.h"
#include "Passes/Inliner.h"
#include "Passes/PLTCall.h"
#include "Passes/ReorderFunctions.h"
#include "Passes/StokeInfo.h"
#include "llvm/Support/Timer.h"
@ -31,6 +32,7 @@ extern cl::opt<unsigned> Verbosity;
extern cl::opt<bool> PrintAll;
extern cl::opt<bool> PrintDynoStats;
extern cl::opt<bool> DumpDotAll;
extern cl::opt<bolt::PLTCall::OptType> PLT;
static cl::opt<bool>
DynoStatsAll("dyno-stats-all",
@ -131,6 +133,13 @@ PrintOptimizeBodyless("print-optimize-bodyless",
cl::Hidden,
cl::cat(BoltOptCategory));
static cl::opt<bool>
PrintPLT("print-plt",
cl::desc("print functions after PLT optimization"),
cl::ZeroOrMore,
cl::Hidden,
cl::cat(BoltOptCategory));
static cl::opt<bool>
PrintPeepholes("print-peepholes",
cl::desc("print functions after peephole optimization"),
@ -331,6 +340,8 @@ void BinaryFunctionPassManager::runAllPasses(
Manager.registerPass(llvm::make_unique<IdenticalCodeFolding>(PrintICF),
opts::ICF);
Manager.registerPass(llvm::make_unique<PLTCall>(PrintPLT));
Manager.registerPass(llvm::make_unique<ReorderBasicBlocks>(PrintReordered));
Manager.registerPass(llvm::make_unique<Peepholes>(PrintPeepholes),

View File

@ -530,7 +530,8 @@ const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
uint64_t Address = Function.getAddress();
auto I = FDEs.find(Address);
if (I == FDEs.end())
// Ignore zero-length FDE ranges.
if (I == FDEs.end() || !I->second->getAddressRange())
return true;
const FDE &CurFDE = *I->second;

View File

@ -13,6 +13,7 @@ add_llvm_library(LLVMBOLTPasses
IndirectCallPromotion.cpp
Inliner.cpp
LivenessAnalysis.cpp
PLTCall.cpp
PettisAndHansen.cpp
RegAnalysis.cpp
ReorderAlgorithm.cpp

94
bolt/Passes/PLTCall.cpp Normal file
View File

@ -0,0 +1,94 @@
//===--- Passes/PLTCall.h - PLT call optimization -------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Replace calls to PLT entries with indirect calls against GOT.
//
//===----------------------------------------------------------------------===//
#include "PLTCall.h"
#include "llvm/Support/Options.h"
#define DEBUG_TYPE "bolt-plt"
using namespace llvm;
namespace opts {
extern cl::OptionCategory BoltOptCategory;
cl::opt<bolt::PLTCall::OptType>
PLT("plt",
cl::desc("optimize PLT calls (requires linking with -znow)"),
cl::init(bolt::PLTCall::OT_NONE),
cl::values(clEnumValN(bolt::PLTCall::OT_NONE,
"none",
"do not optimize PLT calls"),
clEnumValN(bolt::PLTCall::OT_HOT,
"hot",
"optimize executed (hot) PLT calls"),
clEnumValN(bolt::PLTCall::OT_ALL,
"all",
"optimize all PLT calls"),
clEnumValEnd),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
}
namespace llvm {
namespace bolt {
void PLTCall::runOnFunctions(
BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &) {
if (opts::PLT == OT_NONE)
return;
uint64_t NumCallsOptimized = 0;
for (auto &It : BFs) {
auto &Function = It.second;
if (!shouldOptimize(Function))
continue;
if (opts::PLT == OT_HOT &&
Function.getExecutionCount() == BinaryFunction::COUNT_NO_PROFILE)
continue;
for (auto *BB : Function.layout()) {
if (opts::PLT == OT_HOT && !BB->getKnownExecutionCount())
continue;
for (auto &Instr : *BB) {
if (!BC.MIA->isCall(Instr))
continue;
const auto *CallSymbol = BC.MIA->getTargetSymbol(Instr);
if (!CallSymbol)
continue;
const auto *CalleeBF = BC.getFunctionForSymbol(CallSymbol);
if (!CalleeBF || !CalleeBF->isPLTFunction())
continue;
BC.MIA->convertCallToIndirectCall(Instr,
CalleeBF->getPLTSymbol(),
BC.Ctx.get());
++NumCallsOptimized;
}
}
}
if (NumCallsOptimized) {
BC.RequiresZNow = true;
outs() << "BOLT-INFO: " << NumCallsOptimized
<< " PLT calls in the binary were opitmized.\n";
}
}
} // namespace bolt
} // namespace llvm

49
bolt/Passes/PLTCall.h Normal file
View File

@ -0,0 +1,49 @@
//===--- Passes/PLTCall.h - PLT call optimization -------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_PLTCALL_H
#define LLVM_TOOLS_LLVM_BOLT_PASSES_PLTCALL_H
#include "BinaryContext.h"
#include "BinaryFunction.h"
#include "BinaryPasses.h"
namespace llvm {
namespace bolt {
class PLTCall : public BinaryFunctionPass {
public:
/// PLT optimization type
enum OptType : char {
OT_NONE = 0, /// Do not optimize
OT_HOT = 1, /// Optimize hot PLT calls
OT_ALL = 2 /// Optimize all PLT calls
};
explicit PLTCall(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) { }
const char *getName() const override {
return "PLT call optimization";
}
bool shouldPrint(const BinaryFunction &BF) const override {
return BinaryFunctionPass::shouldPrint(BF);
}
void runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) override;
};
} // namespace bolt
} // namespace llvm
#endif

View File

@ -961,6 +961,7 @@ void RewriteInstance::discoverFileObjects() {
AlternativeName = uniquifyName(AltPrefix);
}
// Register names even if it's not a function, e.g. for an entry point.
BC->registerNameAtAddress(UniqueName, Address);
if (!AlternativeName.empty())
BC->registerNameAtAddress(AlternativeName, Address);
@ -1047,8 +1048,6 @@ void RewriteInstance::discoverFileObjects() {
continue;
}
// TODO: populate address map with PLT entries for better readability.
// Checkout for conflicts with function data from FDEs.
bool IsSimple = true;
auto FDEI = CFIRdWrt->getFDEs().lower_bound(Address);
@ -1110,50 +1109,32 @@ void RewriteInstance::discoverFileObjects() {
PreviousFunction = BF;
}
// Process PLT section.
disassemblePLT();
// See if we missed any functions marked by FDE.
for (const auto &FDEI : CFIRdWrt->getFDEs()) {
const auto Address = FDEI.first;
const auto *FDE = FDEI.second;
auto *BF = getBinaryFunctionContainingAddress(Address);
const auto *BF = getBinaryFunctionAtAddress(Address);
if (!BF) {
if (opts::Verbosity >= 1) {
if (const auto *PartialBF = getBinaryFunctionContainingAddress(Address)) {
errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) << ", 0x"
<< Twine::utohexstr(Address + FDE->getAddressRange())
<< ") has no corresponding symbol table entry\n";
}
auto Section = BC->getSectionForAddress(Address);
assert(Section && "cannot get section for address from FDE");
StringRef SectionName;
Section->getName(SectionName);
// PLT has a special FDE.
if (SectionName == ".plt") {
// Set the size to 0 to prevent PLT from being disassembled.
createBinaryFunction("__BOLT_PLT_PSEUDO" , *Section, Address, 0, false);
} else if (SectionName == ".plt.got") {
createBinaryFunction("__BOLT_PLT_GOT_PSEUDO" , *Section, Address, 0,
false);
<< ") conflicts with function " << *PartialBF << '\n';
} else {
if (opts::Verbosity >= 1) {
errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address)
<< ", 0x" << Twine::utohexstr(Address + FDE->getAddressRange())
<< ") has no corresponding symbol table entry\n";
}
auto Section = BC->getSectionForAddress(Address);
assert(Section && "cannot get section for address from FDE");
std::string FunctionName =
"__BOLT_FDE_FUNCat" + Twine::utohexstr(Address).str();
BC->registerNameAtAddress(FunctionName, Address);
createBinaryFunction(FunctionName, *Section, Address,
FDE->getAddressRange(), true);
}
} else if (BF->getAddress() != Address) {
errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) << ", 0x"
<< Twine::utohexstr(Address + FDE->getAddressRange())
<< ") conflicts with function " << *BF << '\n';
}
}
if (PLTGOTSection.getObject()) {
// Check if we need to create a function for .plt.got. Some linkers
// (depending on the version) would mark it with FDE while others wouldn't.
if (!getBinaryFunctionContainingAddress(PLTGOTSection.getAddress(), true)) {
DEBUG(dbgs() << "BOLT-DEBUG: creating .plt.got pseudo function at 0x"
<< Twine::utohexstr(PLTGOTSection.getAddress()) << '\n');
createBinaryFunction("__BOLT_PLT_GOT_PSEUDO" , PLTGOTSection,
PLTGOTSection.getAddress(), 0, false);
}
}
@ -1180,6 +1161,81 @@ void RewriteInstance::discoverFileObjects() {
}
}
void RewriteInstance::disassemblePLT() {
if (!PLTSection.getObject())
return;
const auto PLTAddress = PLTSection.getAddress();
StringRef PLTContents;
PLTSection.getContents(PLTContents);
ArrayRef<uint8_t> PLTData(
reinterpret_cast<const uint8_t *>(PLTContents.data()),
PLTSection.getSize());
// Pseudo function for the start of PLT. The table could have a matching
// FDE that we want to match to pseudo function.
createBinaryFunction("__BOLT_PLT_PSEUDO" , PLTSection, PLTAddress, 0, false);
for (uint64_t Offset = 0; Offset < PLTSection.getSize(); Offset += 0x10) {
uint64_t InstrSize;
MCInst Instruction;
const uint64_t InstrAddr = PLTAddress + Offset;
if (!BC->DisAsm->getInstruction(Instruction,
InstrSize,
PLTData.slice(Offset),
InstrAddr,
nulls(),
nulls())) {
errs() << "BOLT-ERROR: unable to disassemble instruction in .plt "
<< "at offset 0x" << Twine::utohexstr(Offset) << '\n';
exit(1);
}
if (!BC->MIA->isIndirectBranch(Instruction))
continue;
uint64_t TargetAddress;
if (!BC->MIA->evaluateMemOperandTarget(Instruction,
TargetAddress,
InstrAddr,
InstrSize)) {
errs() << "BOLT-ERROR: error evaluating PLT instruction at offset 0x"
<< Twine::utohexstr(InstrAddr) << '\n';
exit(1);
}
// To get the name we have to read a relocation against the address.
for (const auto &Rel : RelaPLTSection.relocations()) {
if (Rel.getType() != ELF::R_X86_64_JUMP_SLOT)
continue;
if (Rel.getOffset() == TargetAddress) {
const auto SymbolIter = Rel.getSymbol();
assert(SymbolIter != InputFile->symbol_end() &&
"non-null symbol expected");
const auto SymbolName = *(*SymbolIter).getName();
std::string Name = SymbolName.str() + "@PLT";
auto *BF = createBinaryFunction(Name,
PLTSection,
InstrAddr,
0,
/*IsSimple=*/false);
auto TargetSymbol = BC->registerNameAtAddress(SymbolName.str() + "@GOT",
TargetAddress);
BF->setPLTSymbol(TargetSymbol);
break;
}
}
}
if (PLTGOTSection.getObject()) {
// Check if we need to create a function for .plt.got. Some linkers
// (depending on the version) would mark it with FDE while others wouldn't.
if (!getBinaryFunctionAtAddress(PLTGOTSection.getAddress())) {
createBinaryFunction("__BOLT_PLT_GOT_PSEUDO" , PLTGOTSection,
PLTGOTSection.getAddress(), 0, false);
}
}
}
void RewriteInstance::adjustFunctionBoundaries() {
for (auto &BFI : BinaryFunctions) {
auto &Function = BFI.second;
@ -1320,6 +1376,7 @@ BinaryFunction *RewriteInstance::createBinaryFunction(
Address, BinaryFunction(Name, Section, Address, Size, *BC, IsSimple));
assert(Result.second == true && "unexpected duplicate function");
auto *BF = &Result.first->second;
BC->registerNameAtAddress(Name, Address);
BC->SymbolToFunctionMap[BF->getSymbol()] = BF;
return BF;
}
@ -1349,8 +1406,14 @@ void RewriteInstance::readSpecialSections() {
HasTextRelocations = true;
} else if (SectionName == ".gdb_index") {
GdbIndexSection = Section;
} else if (SectionName == ".plt") {
PLTSection = Section;
} else if (SectionName == ".got.plt") {
GOTPLTSection = Section;
} else if (SectionName == ".plt.got") {
PLTGOTSection = Section;
} else if (SectionName == ".rela.plt") {
RelaPLTSection = Section;
}
// Ignore zero-size allocatable sections as they present no interest to us.
@ -1733,7 +1796,6 @@ void RewriteInstance::disassembleFunctions() {
abort();
}
if (opts::PrintAll || opts::PrintDisasm)
Function.print(outs(), "after disassembly", true);
@ -3144,7 +3206,9 @@ void RewriteInstance::patchELFSymTabs(ELFObjectFile<ELFT> *File) {
const auto *Function = getBinaryFunctionAtAddress(Symbol.st_value);
// Some section symbols may be mistakenly associated with the first
// function emitted in the section. Dismiss if it is a section symbol.
if (Function && NewSymbol.getType() != ELF::STT_SECTION) {
if (Function &&
!Function->getPLTSymbol() &&
NewSymbol.getType() != ELF::STT_SECTION) {
NewSymbol.st_value = Function->getOutputAddress();
NewSymbol.st_size = Function->getOutputSize();
NewSymbol.st_shndx = NewTextSectionIndex;
@ -3275,15 +3339,6 @@ template <typename ELFT>
void RewriteInstance::patchELFRelaPLT(ELFObjectFile<ELFT> *File) {
auto &OS = Out->os();
SectionRef RelaPLTSection;
for (const auto &Section : File->sections()) {
StringRef SectionName;
Section.getName(SectionName);
if (SectionName == ".rela.plt") {
RelaPLTSection = Section;
break;
}
}
if (!RelaPLTSection.getObject()) {
errs() << "BOLT-INFO: no .rela.plt section found\n";
return;
@ -3362,6 +3417,8 @@ void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) {
}
assert(DynamicPhdr && "missing dynamic in ELF binary");
bool ZNowSet = false;
// Go through all dynamic entries and patch functions addresses with
// new ones.
ErrorOr<const Elf_Dyn *> DTB = Obj->dynamic_table_begin(DynamicPhdr);
@ -3376,10 +3433,24 @@ void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) {
break;
case ELF::DT_INIT:
case ELF::DT_FINI:
if (auto NewAddress = getNewFunctionAddress(DE->getPtr())) {
DEBUG(dbgs() << "BOLT-DEBUG: patching dynamic entry of type "
<< DE->getTag() << '\n');
NewDE.d_un.d_ptr = NewAddress;
if (opts::Relocs) {
if (auto NewAddress = getNewFunctionAddress(DE->getPtr())) {
DEBUG(dbgs() << "BOLT-DEBUG: patching dynamic entry of type "
<< DE->getTag() << '\n');
NewDE.d_un.d_ptr = NewAddress;
}
}
break;
case ELF::DT_FLAGS:
if (BC->RequiresZNow) {
NewDE.d_un.d_val |= ELF::DF_BIND_NOW;
ZNowSet = true;
}
break;
case ELF::DT_FLAGS_1:
if (BC->RequiresZNow) {
NewDE.d_un.d_val |= ELF::DF_1_NOW;
ZNowSet = true;
}
break;
}
@ -3388,6 +3459,13 @@ void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) {
DynamicOffset + (DE - *DTB) * sizeof(*DE));
}
}
if (BC->RequiresZNow && !ZNowSet) {
errs() << "BOLT-ERROR: output binary requires immediate relocation "
"processing which depends on DT_FLAGS or DT_FLAGS_1 presence in "
".dynamic. Please re-link the binary with -znow.\n";
exit(1);
}
}
uint64_t RewriteInstance::getNewFunctionAddress(uint64_t OldAddress) {
@ -3573,10 +3651,10 @@ void RewriteInstance::rewriteFile() {
// Copy non-allocatable sections once allocatable part is finished.
rewriteNoteSections();
if (opts::Relocs) {
// Patch dynamic section/segment.
patchELFDynamic();
// Patch dynamic section/segment.
patchELFDynamic();
if (opts::Relocs) {
patchELFRelaPLT();
patchELFGOT();

View File

@ -290,6 +290,9 @@ private:
// Run ObjectLinkingLayer() with custom memory manager and symbol resolver.
orc::ObjectLinkingLayer<> OLT;
/// Disassemble and create function entries for PLT.
void disassemblePLT();
/// ELF-specific part. TODO: refactor into new class.
#define ELF_FUNCTION(FUNC) \
template <typename ELFT> void FUNC(ELFObjectFile<ELFT> *Obj); \
@ -481,12 +484,28 @@ private:
const llvm::DWARFFrame *EHFrame{nullptr};
SectionRef EHFrameSection;
/// .plt section.
SectionRef PLTSection;
/// .got.plt sections.
///
/// Contains jump slots (addresses) indirectly referenced by
/// instructions in .plt section.
SectionRef GOTPLTSection;
/// .plt.got section (#clowntown).
///
/// A section sometimes generated by BFD linker.
SectionRef PLTGOTSection;
/// .rela.plt section.
///
/// Contains relocations against .got.plt.
SectionRef RelaPLTSection;
/// .gdb_index section.
SectionRef GdbIndexSection;
/// .plt.got section.
SectionRef PLTGOTSection;
uint64_t NewSymTabOffset{0};
/// Keep track of functions we fail to write in the binary. We need to avoid