llvm-project/bolt/BinaryPasses.h

566 lines
20 KiB
C++

//===--- BinaryPasses.h - Binary-level analysis/optimization passes -------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// The set of optimization/analysis passes that run on BinaryFunctions.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_BOLT_BINARY_PASSES_H
#define LLVM_TOOLS_LLVM_BOLT_BINARY_PASSES_H
#include "BinaryContext.h"
#include "BinaryFunction.h"
#include "llvm/Support/CommandLine.h"
#include <map>
#include <set>
#include <string>
#include <unordered_map>
#include <unordered_set>
namespace llvm {
namespace bolt {
/// An optimization/analysis pass that runs on functions.
class BinaryFunctionPass {
const cl::opt<bool> &PrintPass;
protected:
explicit BinaryFunctionPass(const cl::opt<bool> &PrintPass)
: PrintPass(PrintPass) { }
/// Control whether a specific function should be skipped during
/// optimization.
bool shouldOptimize(const BinaryFunction &BF) const;
public:
virtual ~BinaryFunctionPass() = default;
/// The name of this pass
virtual const char *getName() const = 0;
/// Control whether debug info is printed after this pass is completed.
bool printPass() const { return PrintPass; }
/// Control whether debug info is printed for an individual function after
/// this pass is completed (printPass() must have returned true).
virtual bool shouldPrint(const BinaryFunction &BF) const;
/// Execute this pass on the given functions.
virtual void runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) = 0;
};
/// Detects functions that simply do a tail call when they are called and
/// optimizes calls to these functions.
class OptimizeBodylessFunctions : public BinaryFunctionPass {
private:
/// EquivalentCallTarget[F] = G ==> function F is simply a tail call to G,
/// thus calls to F can be optimized to calls to G.
std::unordered_map<const MCSymbol *, const BinaryFunction *>
EquivalentCallTarget;
void analyze(BinaryFunction &BF,
BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs);
void optimizeCalls(BinaryFunction &BF,
BinaryContext &BC);
/// Stats for eliminated calls.
uint64_t NumEliminatedCalls{0};
uint64_t NumOptimizedCallSites{0};
public:
explicit OptimizeBodylessFunctions(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) { }
const char *getName() const override {
return "optimize-bodyless";
}
void runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) override;
};
/// Inlining of single basic block functions.
/// The pass currently does not handle CFI instructions. This is needed for
/// correctness and we may break exception handling because of this.
class InlineSmallFunctions : public BinaryFunctionPass {
private:
std::set<const BinaryFunction *> InliningCandidates;
/// Maximum number of instructions in an inlined function.
static const unsigned kMaxInstructions = 8;
/// Maximum code size (in bytes) of inlined function (used by aggressive
/// inlining).
static const uint64_t kMaxSize = 60;
/// Maximum number of functions that will be considered for inlining (in
/// descending hottness order).
static const unsigned kMaxFunctions = 30000;
/// Statistics collected for debugging.
uint64_t TotalDynamicCalls = 0;
uint64_t InlinedDynamicCalls = 0;
uint64_t TotalInlineableCalls = 0;
std::unordered_set<const BinaryFunction *> Modified;
static bool mustConsider(const BinaryFunction &BF);
void findInliningCandidates(BinaryContext &BC,
const std::map<uint64_t, BinaryFunction> &BFs);
/// Inline the call in CallInst to InlinedFunctionBB (the only BB of the
/// called function).
void inlineCall(BinaryContext &BC,
BinaryBasicBlock &BB,
MCInst *CallInst,
const BinaryBasicBlock &InlinedFunctionBB);
bool inlineCallsInFunction(BinaryContext &BC,
BinaryFunction &Function);
/// The following methods do a more aggressive inlining pass, where we
/// inline calls as well as tail calls and we are not limited to inlining
/// functions with only one basic block.
/// FIXME: Currently these are broken since they do not work with the split
/// function option.
void findInliningCandidatesAggressive(
BinaryContext &BC, const std::map<uint64_t, BinaryFunction> &BFs);
bool inlineCallsInFunctionAggressive(
BinaryContext &BC, BinaryFunction &Function);
/// Inline the call in CallInst to InlinedFunction. Inlined function should not
/// contain any landing pad or thrower edges but can have more than one blocks.
///
/// Return the location (basic block and instruction index) where the code of
/// the caller function continues after the the inlined code.
std::pair<BinaryBasicBlock *, unsigned>
inlineCall(BinaryContext &BC,
BinaryFunction &CallerFunction,
BinaryBasicBlock *CallerBB,
const unsigned CallInstIdex,
const BinaryFunction &InlinedFunction);
public:
explicit InlineSmallFunctions(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) { }
const char *getName() const override {
return "inlining";
}
bool shouldPrint(const BinaryFunction &BF) const override {
return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0;
}
void runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) override;
};
/// Detect and eliminate unreachable basic blocks. We could have those
/// filled with nops and they are used for alignment.
class EliminateUnreachableBlocks : public BinaryFunctionPass {
std::unordered_set<const BinaryFunction *> Modified;
unsigned DeletedBlocks{0};
uint64_t DeletedBytes{0};
void runOnFunction(BinaryFunction& Function);
public:
EliminateUnreachableBlocks(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) { }
const char *getName() const override {
return "eliminate-unreachable";
}
bool shouldPrint(const BinaryFunction &BF) const override {
return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0;
}
void runOnFunctions(BinaryContext&,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) override;
};
// Reorder the basic blocks for each function based on hotness.
class ReorderBasicBlocks : public BinaryFunctionPass {
public:
explicit ReorderBasicBlocks(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) { }
const char *getName() const override {
return "reordering";
}
bool shouldPrint(const BinaryFunction &BF) const override;
void runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) override;
};
/// Sync local branches with CFG.
class FixupBranches : public BinaryFunctionPass {
public:
explicit FixupBranches(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) { }
const char *getName() const override {
return "fix-branches";
}
void runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) override;
};
/// Fix the CFI state and exception handling information after all other
/// passes have completed.
class FixupFunctions : public BinaryFunctionPass {
public:
explicit FixupFunctions(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) { }
const char *getName() const override {
return "fixup-functions";
}
void runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) override;
};
/// An optimization to simplify conditional tail calls by removing
/// unnecessary branches.
///
/// This optimization considers both of the following cases:
///
/// foo: ...
/// jcc L1 original
/// ...
/// L1: jmp bar # TAILJMP
///
/// ->
///
/// foo: ...
/// jcc bar iff jcc L1 is expected
/// ...
///
/// L1 is unreachable
///
/// OR
///
/// foo: ...
/// jcc L2
/// L1: jmp dest # TAILJMP
/// L2: ...
///
/// ->
///
/// foo: jncc dest # TAILJMP
/// L2: ...
///
/// L1 is unreachable
///
/// For this particular case, the first basic block ends with
/// a conditional branch and has two successors, one fall-through
/// and one for when the condition is true.
/// The target of the conditional is a basic block with a single
/// unconditional branch (i.e. tail call) to another function.
/// We don't care about the contents of the fall-through block.
/// We assume that the target of the conditional branch is the
/// first successor.
class SimplifyConditionalTailCalls : public BinaryFunctionPass {
uint64_t NumCandidateTailCalls{0};
uint64_t NumTailCallsPatched{0};
uint64_t NumOrigForwardBranches{0};
uint64_t NumOrigBackwardBranches{0};
std::unordered_set<const BinaryFunction *> Modified;
bool shouldRewriteBranch(const BinaryBasicBlock *PredBB,
const MCInst &CondBranch,
const BinaryBasicBlock *BB,
const bool DirectionFlag);
uint64_t fixTailCalls(BinaryContext &BC, BinaryFunction &BF);
public:
explicit SimplifyConditionalTailCalls(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) { }
const char *getName() const override {
return "simplify-conditional-tail-calls";
}
bool shouldPrint(const BinaryFunction &BF) const override {
return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0;
}
void runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) override;
};
/// Perform simple peephole optimizations.
class Peepholes : public BinaryFunctionPass {
uint64_t NumDoubleJumps{0};
uint64_t TailCallTraps{0};
/// Attempt to use the minimum operand width for arithmetic, branch and
/// move instructions.
void shortenInstructions(BinaryContext &BC, BinaryFunction &Function);
/// Replace double jumps with a jump directly to the target, i.e.
/// jmp/jcc L1; L1: jmp L2 -> jmp/jcc L2.
void fixDoubleJumps(BinaryContext &BC, BinaryFunction &Function);
/// Add trap instructions immediately after indirect tail calls to prevent
/// the processor from decoding instructions immediate following the
/// tailcall.
void addTailcallTraps(BinaryContext &BC, BinaryFunction &Function);
public:
explicit Peepholes(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) { }
const char *getName() const override {
return "peepholes";
}
void runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) override;
};
/// An optimization to simplify loads from read-only sections.The pass converts
/// load instructions with statically computed target address such as:
///
/// mov 0x12f(%rip), %eax
///
/// to their counterparts that use immediate opreands instead of memory loads:
///
/// mov $0x4007dc, %eax
///
/// when the target address points somewhere inside a read-only section.
///
class SimplifyRODataLoads : public BinaryFunctionPass {
uint64_t NumLoadsSimplified{0};
uint64_t NumDynamicLoadsSimplified{0};
uint64_t NumLoadsFound{0};
uint64_t NumDynamicLoadsFound{0};
std::unordered_set<const BinaryFunction *> Modified;
bool simplifyRODataLoads(BinaryContext &BC, BinaryFunction &BF);
public:
explicit SimplifyRODataLoads(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) { }
const char *getName() const override {
return "simplify-read-only-loads";
}
bool shouldPrint(const BinaryFunction &BF) const override {
return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0;
}
void runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) override;
};
/// An optimization that replaces references to identical functions with
/// references to a single one of them.
///
class IdenticalCodeFolding : public BinaryFunctionPass {
public:
explicit IdenticalCodeFolding(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) { }
const char *getName() const override {
return "identical-code-folding";
}
void runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) override;
};
///
/// Prints a list of the top 100 functions sorted by a set of
/// dyno stats categories.
///
class PrintSortedBy : public BinaryFunctionPass {
public:
explicit PrintSortedBy(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) { }
const char *getName() const override {
return "print-sorted-by";
}
bool shouldPrint(const BinaryFunction &) const override {
return false;
}
void runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) override;
};
/// Optimize indirect calls.
/// The indirect call promotion pass visits each indirect call and
/// examines the BranchData for each. If the most frequent targets
/// from that callsite exceed the specified threshold (default 90%),
/// the call is promoted. Otherwise, it is ignored. By default,
/// only one target is considered at each callsite.
///
/// When an candidate callsite is processed, we modify the callsite
/// to test for the most common call targets before calling through
/// the original generic call mechanism.
///
/// The CFG and layout are modified by ICP.
///
/// A few new command line options have been added:
/// -indirect-call-promotion
/// -indirect-call-promotion-threshold=<percentage>
/// -indirect-call-promotion-mispredict-threshold=<percentage>
/// -indirect-call-promotion-topn=<int>
///
/// The threshold is the minimum frequency of a call target needed
/// before ICP is triggered.
///
/// The mispredict threshold is used to disable the optimization at
/// any callsite where the branch predictor does a good enough job
/// that ICP wouldn't help regardless of the frequency of the most
/// common target.
///
/// The topn option controls the number of targets to consider for
/// each callsite, e.g. ICP is triggered if topn=2 and the total
/// frequency of the top two call targets exceeds the threshold.
///
/// The minimize code size option controls whether or not the hot
/// calls are to registers (callq %r10) or to function addresses
/// (callq $foo).
///
/// Example of ICP:
///
/// C++ code:
///
/// int B_count = 0;
/// int C_count = 0;
///
/// struct A { virtual void foo() = 0; }
/// struct B : public A { virtual void foo() { ++B_count; }; };
/// struct C : public A { virtual void foo() { ++C_count; }; };
///
/// A* a = ...
/// a->foo();
/// ...
///
/// original assembly:
///
/// B0: 49 8b 07 mov (%r15),%rax
/// 4c 89 ff mov %r15,%rdi
/// ff 10 callq *(%rax)
/// 41 83 e6 01 and $0x1,%r14d
/// 4d 89 e6 mov %r12,%r14
/// 4c 0f 44 f5 cmove %rbp,%r14
/// 4c 89 f7 mov %r14,%rdi
/// ...
///
/// after ICP:
///
/// B0: 49 8b 07 mov (%r15),%rax
/// 4c 89 ff mov %r15,%rdi
/// 48 81 38 e0 0b 40 00 cmpq $B::foo,(%rax)
/// 75 29 jne B3
/// B1: e8 45 03 00 00 callq $B::foo
/// B2: 41 83 e6 01 and $0x1,%r14d
/// 4d 89 e6 mov %r12,%r14
/// 4c 0f 44 f5 cmove %rbp,%r14
/// 4c 89 f7 mov %r14,%rdi
/// ...
///
/// B3: ff 10 callq *(%rax)
/// eb d6 jmp B2
///
class IndirectCallPromotion : public BinaryFunctionPass {
using BasicBlocksVector = std::vector<std::unique_ptr<BinaryBasicBlock>>;
std::unordered_set<const BinaryFunction *> Modified;
// Total number of calls from all callsites.
uint64_t TotalCalls{0};
// Total number of indirect calls from all callsites.
// (a fraction of TotalCalls)
uint64_t TotalIndirectCalls{0};
// Total number of callsites that use indirect calls.
// (the total number of callsites is not recorded)
uint64_t TotalIndirectCallsites{0};
// Total number of indirect callsites that are optimized by ICP.
// (a fraction of TotalIndirectCallsites)
uint64_t TotalOptimizedIndirectCallsites{0};
// Total number of indirect calls that are optimized by ICP.
// (a fraction of TotalCalls)
uint64_t TotalNumFrequentCalls{0};
std::vector<BranchInfo> getCallTargets(BinaryContext &BC,
const FuncBranchData &BranchData,
const MCInst &Inst) const;
size_t canPromoteCallsite(const BinaryBasicBlock *BB,
const MCInst &Inst,
const std::vector<BranchInfo> &Targets,
uint64_t NumCalls);
void printCallsiteInfo(const BinaryBasicBlock *BB,
const MCInst &Inst,
const std::vector<BranchInfo> &Targets,
const size_t N,
uint64_t NumCalls) const;
std::vector<std::pair<MCSymbol *, uint64_t>>
findCallTargetSymbols(BinaryContext &BC,
const std::vector<BranchInfo> &Targets,
const size_t N) const;
std::vector<std::unique_ptr<BinaryBasicBlock>>
rewriteCall(BinaryContext &BC,
BinaryFunction &Function,
BinaryBasicBlock *IndCallBlock,
const MCInst &CallInst,
MCInstrAnalysis::ICPdata &&ICPcode) const;
BinaryBasicBlock *fixCFG(BinaryContext &BC,
BinaryFunction &Function,
BinaryBasicBlock *IndCallBlock,
const bool IsTailCall,
BasicBlocksVector &&NewBBs,
const std::vector<BranchInfo> &Targets) const;
public:
explicit IndirectCallPromotion(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) { }
const char *getName() const {
return "indirect-call-promotion";
}
bool shouldPrint(const BinaryFunction &BF) const override {
return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0;
}
void runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) override;
};
/// Pass for lowering any instructions that we have raised and that have
/// to be lowered.
class InstructionLowering : public BinaryFunctionPass {
public:
explicit InstructionLowering(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) {}
const char *getName() const override {
return "inst-lowering";
}
void runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) override;
};
} // namespace bolt
} // namespace llvm
#endif