More aggressive inlining pass

Summary:
This adds functionality for a more aggressive inlining pass, that can
inline tail calls and functions with more than one basic block.

(cherry picked from FBD3677856)
This commit is contained in:
Theodoros Kasampalis 2016-07-29 14:17:06 -07:00 committed by Maksim Panchenko
parent 82d76ae18b
commit 32739247eb
6 changed files with 468 additions and 21 deletions

View File

@ -412,23 +412,25 @@ public:
/// Replace an instruction with a sequence of instructions. Returns true
/// if the instruction to be replaced was found and replaced.
bool replaceInstruction(MCInst *Inst,
const std::vector<MCInst> &Replacement) {
template <typename Itr>
bool replaceInstruction(MCInst *Inst, Itr Begin, Itr End) {
auto I = Instructions.end();
auto B = Instructions.begin();
while (I > B) {
--I;
if (&*I == Inst) {
Instructions.insert(
Instructions.erase(I),
Replacement.begin(),
Replacement.end());
Instructions.insert(Instructions.erase(I), Begin, End);
return true;
}
}
return false;
}
bool replaceInstruction(MCInst *Inst,
const std::vector<MCInst> &Replacement) {
return replaceInstruction(Inst, Replacement.begin(), Replacement.end());
}
/// Split apart the instructions in this basic block starting at Inst.
/// The instructions following Inst are removed and returned in a vector.
std::vector<MCInst> splitInstructions(const MCInst *Inst) {

View File

@ -550,13 +550,14 @@ void BinaryFunction::clearLandingPads(const unsigned StartIndex,
void BinaryFunction::addLandingPads(const unsigned StartIndex,
const unsigned NumBlocks) {
for (auto I = StartIndex; I < StartIndex + NumBlocks; ++I) {
auto *BB = BasicBlocks[I];
for (auto *BB : BasicBlocks) {
if (LandingPads.find(BB->getLabel()) != LandingPads.end()) {
MCSymbol *LP = BB->getLabel();
for (unsigned I : LPToBBIndex.at(LP)) {
for (unsigned I : LPToBBIndex[LP]) {
assert(I < BasicBlocks.size());
BinaryBasicBlock *ThrowBB = BasicBlocks[I];
const unsigned ThrowBBIndex = getIndex(ThrowBB);
if (ThrowBBIndex >= StartIndex && ThrowBBIndex < StartIndex + NumBlocks)
ThrowBB->addLandingPad(BB);
}
}

View File

@ -505,6 +505,24 @@ public:
return BasicBlocks.at(Index);
}
/// Returns the basic block after the given basic block in the layout or
/// nullptr the last basic block is given.
const BinaryBasicBlock *getBasicBlockAfter(const BinaryBasicBlock *BB) const {
for (auto I = layout_begin(), E = layout_end(); I != E; ++I) {
if (*I == BB && std::next(I) != E)
return *std::next(I);
}
return nullptr;
}
BinaryBasicBlock *getBasicBlockAfter(const BinaryBasicBlock *BB) {
for (auto I = layout_begin(), E = layout_end(); I != E; ++I) {
if (*I == BB && std::next(I) != E)
return *std::next(I);
}
return nullptr;
}
/// Return the name of the function as extracted from the binary file.
/// If the function has multiple names - return the last one
/// followed by "(*#<numnames>)".

View File

@ -82,6 +82,9 @@ void BinaryFunctionPassManager::runAllPasses(
Manager.registerPass(llvm::make_unique<IdenticalCodeFolding>(),
opts::IdenticalCodeFolding);
Manager.registerPass(llvm::make_unique<InlineSmallFunctions>(),
opts::InlineSmallFunctions);
Manager.registerPass(
std::move(llvm::make_unique<EliminateUnreachableBlocks>(Manager.NagUser)),
opts::EliminateUnreachable);
@ -103,9 +106,6 @@ void BinaryFunctionPassManager::runAllPasses(
Manager.registerPass(llvm::make_unique<OptimizeBodylessFunctions>(),
opts::OptimizeBodylessFunctions);
Manager.registerPass(llvm::make_unique<InlineSmallFunctions>(),
opts::InlineSmallFunctions);
Manager.registerPass(std::move(llvm::make_unique<FixupFunctions>()));
Manager.registerPass(llvm::make_unique<Peepholes>(), opts::Peepholes);

View File

@ -172,6 +172,47 @@ void InlineSmallFunctions::findInliningCandidates(
<< " inlineable functions.\n");
}
void InlineSmallFunctions::findInliningCandidatesAggressive(
BinaryContext &BC,
const std::map<uint64_t, BinaryFunction> &BFs) {
std::set<std::string> OverwrittenFunctions = {
"_ZN4HPHP13hash_string_iEPKcj",
"_ZN4HPHP21hash_string_cs_unsafeEPKcj",
"_ZN4HPHP14hash_string_csEPKcj",
"_ZN4HPHP20hash_string_i_unsafeEPKcj",
"_ZNK4HPHP10StringData10hashHelperEv"
};
for (const auto &BFIt : BFs) {
const auto &Function = BFIt.second;
if (!Function.isSimple() ||
!opts::shouldProcess(Function) ||
OverwrittenFunctions.count(Function.getName()) ||
Function.hasEHRanges())
continue;
uint64_t FunctionSize = 0;
for (const auto *BB : Function.layout()) {
FunctionSize += BC.computeCodeSize(BB->begin(), BB->end());
}
assert(FunctionSize > 0 && "found empty function");
if (FunctionSize > kMaxSize)
continue;
bool FoundCFI = false;
for (const auto BB : Function.layout()) {
for (const auto &Inst : *BB) {
if (BC.MIA->isEHLabel(Inst) || BC.MIA->isCFI(Inst)) {
FoundCFI = true;
break;
}
}
}
if (!FoundCFI)
InliningCandidates.insert(Function.getName());
}
DEBUG(errs() << "BOLT-DEBUG: " << InliningCandidates.size()
<< " inlineable functions.\n");
}
namespace {
/// Returns whether a function creates a stack frame for itself or not.
@ -225,7 +266,275 @@ void InlineSmallFunctions::inlineCall(
BB.replaceInstruction(CallInst, InlinedInstance);
}
void InlineSmallFunctions::inlineCallsInFunction(
std::pair<BinaryBasicBlock *, unsigned>
InlineSmallFunctions::inlineCall(
BinaryContext &BC,
BinaryFunction &CallerFunction,
BinaryBasicBlock *CallerBB,
const unsigned CallInstIndex,
const BinaryFunction &InlinedFunction) {
// Get the instruction to be replaced with inlined code.
MCInst &CallInst = CallerBB->getInstructionAtIndex(CallInstIndex);
assert(BC.MIA->isCall(CallInst) && "Can only inline a call.");
// Point in the function after the inlined code.
BinaryBasicBlock *AfterInlinedBB = nullptr;
unsigned AfterInlinedIstrIndex = 0;
// In case of a tail call we should not remove any ret instructions from the
// inlined instance.
bool IsTailCall = BC.MIA->isTailCall(CallInst);
// The first block of the function to be inlined can be merged with the caller
// basic block. This cannot happen if there are jumps to the first block.
bool CanMergeFirstInlinedBlock = (*InlinedFunction.begin()).pred_size() == 0;
// If the call to be inlined is not at the end of its basic block and we have
// to inline more than one basic blocks (or even just one basic block that
// cannot be merged into the caller block), then the caller's basic block
// should be split.
bool ShouldSplitCallerBB =
CallInstIndex < CallerBB->size() - 1 &&
(InlinedFunction.size() > 1 || !CanMergeFirstInlinedBlock);
// Copy inlined function's basic blocks into a vector of basic blocks that
// will be inserted in the caller function (the inlined instance). Also, we
// keep a mapping from basic block index to the corresponding block in the
// inlined instance.
std::vector<std::unique_ptr<BinaryBasicBlock>> InlinedInstance;
std::vector<BinaryBasicBlock *>
BBIndexToInlinedInstanceBB(InlinedFunction.size(), nullptr);
for (const auto InlinedFunctionBB : InlinedFunction.layout()) {
InlinedInstance.emplace_back(CallerFunction.createBasicBlock(0));
BBIndexToInlinedInstanceBB[InlinedFunction.getIndex(InlinedFunctionBB)] =
InlinedInstance.back().get();
if (InlinedFunction.hasValidProfile())
InlinedInstance.back()->setExecutionCount(
InlinedFunctionBB->getExecutionCount());
}
if (ShouldSplitCallerBB) {
// Add one extra block at the inlined instance for the removed part of the
// caller block.
InlinedInstance.emplace_back(CallerFunction.createBasicBlock(0));
BBIndexToInlinedInstanceBB.push_back(InlinedInstance.back().get());
if (CallerFunction.hasValidProfile())
InlinedInstance.back()->setExecutionCount(CallerBB->getExecutionCount());
}
// Copy instructions to the basic blocks of the inlined instance.
unsigned InlinedInstanceBBIndex = 0;
for (const auto InlinedFunctionBB : InlinedFunction.layout()) {
// Get the corresponding block of the inlined instance.
auto *InlinedInstanceBB = InlinedInstance[InlinedInstanceBBIndex].get();
assert(InlinedInstanceBB ==
BBIndexToInlinedInstanceBB[InlinedFunction.getIndex(InlinedFunctionBB)]);
bool IsExitingBlock = false;
// Copy instructions into the inlined instance.
for (auto Instruction : *InlinedFunctionBB) {
if (!IsTailCall &&
BC.MIA->isReturn(Instruction) &&
!BC.MIA->isTailCall(Instruction)) {
// Skip returns when the caller does a normal call as opposed to a tail
// call.
IsExitingBlock = true;
continue;
}
if (!IsTailCall &&
BC.MIA->isTailCall(Instruction)) {
// Convert tail calls to normal calls when the caller does a normal
// call.
if (!BC.MIA->convertTailCallToCall(Instruction))
assert(false && "unexpected tail call opcode found");
IsExitingBlock = true;
}
if (BC.MIA->isBranch(Instruction) &&
!BC.MIA->isIndirectBranch(Instruction)) {
// Convert the branch targets in the branch instructions that will be
// added to the inlined instance.
const MCSymbol *OldTargetLabel = nullptr;
const MCSymbol *OldFTLabel = nullptr;
MCInst *CondBranch = nullptr;
MCInst *UncondBranch = nullptr;
assert(BC.MIA->analyzeBranch(Instruction, OldTargetLabel, OldFTLabel,
CondBranch, UncondBranch));
assert(OldTargetLabel);
const MCSymbol *NewTargetLabel = nullptr;
for (const auto SuccBB : InlinedFunctionBB->successors()) {
if (SuccBB->getLabel() == OldTargetLabel) {
const auto InlinedInstanceSuccBB =
BBIndexToInlinedInstanceBB[InlinedFunction.getIndex(SuccBB)];
NewTargetLabel = InlinedInstanceSuccBB->getLabel();
break;
}
}
assert(NewTargetLabel);
BC.MIA->replaceBranchTarget(Instruction, NewTargetLabel, BC.Ctx.get());
}
// TODO; Currently we simply ignore CFI instructions but we need to
// address them for correctness.
if (!BC.MIA->isEHLabel(Instruction) &&
!BC.MIA->isCFI(Instruction)) {
InlinedInstanceBB->addInstruction(std::move(Instruction));
}
}
// Add CFG edges to the basic blocks of the inlined instance.
std::vector<BinaryBasicBlock *>
Successors(InlinedFunctionBB->succ_size(), nullptr);
std::transform(
InlinedFunctionBB->succ_begin(),
InlinedFunctionBB->succ_end(),
Successors.begin(),
[&InlinedFunction, &BBIndexToInlinedInstanceBB]
(const BinaryBasicBlock *BB) {
return BBIndexToInlinedInstanceBB[InlinedFunction.getIndex(BB)];
});
if (InlinedFunction.hasValidProfile()) {
InlinedInstanceBB->addSuccessors(
Successors.begin(),
Successors.end(),
InlinedFunctionBB->branch_info_begin(),
InlinedFunctionBB->branch_info_end());
} else {
InlinedInstanceBB->addSuccessors(
Successors.begin(),
Successors.end());
}
if (IsExitingBlock) {
assert(Successors.size() == 0);
if (ShouldSplitCallerBB) {
if (InlinedFunction.hasValidProfile()) {
InlinedInstanceBB->addSuccessor(
InlinedInstance.back().get(),
InlinedInstanceBB->getExecutionCount());
} else {
InlinedInstanceBB->addSuccessor(InlinedInstance.back().get());
}
MCInst ExitBranchInst;
const MCSymbol *ExitLabel = InlinedInstance.back().get()->getLabel();
BC.MIA->createUncondBranch(ExitBranchInst, ExitLabel, BC.Ctx.get());
InlinedInstanceBB->addInstruction(std::move(ExitBranchInst));
} else if (InlinedInstanceBBIndex > 0 || !CanMergeFirstInlinedBlock) {
assert(CallInstIndex == CallerBB->size() - 1);
assert(CallerBB->succ_size() <= 1);
if (CallerBB->succ_size() == 1) {
if (InlinedFunction.hasValidProfile()) {
InlinedInstanceBB->addSuccessor(
*CallerBB->succ_begin(),
InlinedInstanceBB->getExecutionCount());
} else {
InlinedInstanceBB->addSuccessor(*CallerBB->succ_begin());
}
MCInst ExitBranchInst;
const MCSymbol *ExitLabel = (*CallerBB->succ_begin())->getLabel();
BC.MIA->createUncondBranch(ExitBranchInst, ExitLabel, BC.Ctx.get());
InlinedInstanceBB->addInstruction(std::move(ExitBranchInst));
}
}
}
++InlinedInstanceBBIndex;
}
if (ShouldSplitCallerBB) {
// Split the basic block that contains the call and add the removed
// instructions in the last block of the inlined instance.
// (Is it OK to have a basic block with just CFI instructions?)
std::vector<MCInst> TrailInstructions =
std::move(CallerBB->splitInstructions(&CallInst));
assert(TrailInstructions.size() > 0);
InlinedInstance.back()->addInstructions(
TrailInstructions.begin(),
TrailInstructions.end());
// Add CFG edges for the block with the removed instructions.
if (CallerFunction.hasValidProfile()) {
InlinedInstance.back()->addSuccessors(
CallerBB->succ_begin(),
CallerBB->succ_end(),
CallerBB->branch_info_begin(),
CallerBB->branch_info_end());
} else {
InlinedInstance.back()->addSuccessors(
CallerBB->succ_begin(),
CallerBB->succ_end());
}
// Update the after-inlined point.
AfterInlinedBB = InlinedInstance.back().get();
AfterInlinedIstrIndex = 0;
}
assert(InlinedInstance.size() > 0 && "found function with no basic blocks");
assert(InlinedInstance.front()->size() > 0 &&
"found function with empty basic block");
// If the inlining cannot happen as a simple instruction insertion into
// CallerBB, we remove the outgoing CFG edges of the caller block.
if (InlinedInstance.size() > 1 || !CanMergeFirstInlinedBlock) {
CallerBB->removeSuccessors(CallerBB->succ_begin(), CallerBB->succ_end());
if (!ShouldSplitCallerBB) {
// Update the after-inlined point.
AfterInlinedBB = CallerFunction.getBasicBlockAfter(CallerBB);
AfterInlinedIstrIndex = 0;
}
} else {
assert(!ShouldSplitCallerBB);
// Update the after-inlined point.
if (CallInstIndex < CallerBB->size() - 1) {
AfterInlinedBB = CallerBB;
AfterInlinedIstrIndex =
CallInstIndex + InlinedInstance.front()->size();
} else {
AfterInlinedBB = CallerFunction.getBasicBlockAfter(CallerBB);
AfterInlinedIstrIndex = 0;
}
}
// Do the inlining by merging the first block of the inlined instance into
// the caller basic block if possible and adding the rest of the inlined
// instance basic blocks in the caller function.
if (CanMergeFirstInlinedBlock) {
CallerBB->replaceInstruction(
&CallInst,
InlinedInstance.front()->begin(),
InlinedInstance.front()->end());
if (InlinedInstance.size() > 1) {
auto FirstBB = InlinedInstance.begin()->get();
if (InlinedFunction.hasValidProfile()) {
CallerBB->addSuccessors(
FirstBB->succ_begin(),
FirstBB->succ_end(),
FirstBB->branch_info_begin(),
FirstBB->branch_info_end());
} else {
CallerBB->addSuccessors(
FirstBB->succ_begin(),
FirstBB->succ_end());
}
FirstBB->removeSuccessors(FirstBB->succ_begin(), FirstBB->succ_end());
}
InlinedInstance.erase(InlinedInstance.begin());
} else {
CallerBB->eraseInstruction(&CallInst);
if (CallerFunction.hasValidProfile()) {
CallerBB->addSuccessor(InlinedInstance.front().get(),
CallerBB->getExecutionCount());
} else {
CallerBB->addSuccessor(InlinedInstance.front().get(),
CallerBB->getExecutionCount());
}
}
unsigned NumBlocksToAdd = InlinedInstance.size();
CallerFunction.insertBasicBlocks(CallerBB, std::move(InlinedInstance));
CallerFunction.updateLayout(CallerBB, NumBlocksToAdd);
CallerFunction.fixBranches();
return std::make_pair(AfterInlinedBB, AfterInlinedIstrIndex);
}
bool InlineSmallFunctions::inlineCallsInFunction(
BinaryContext &BC,
BinaryFunction &Function) {
std::vector<BinaryBasicBlock *> Blocks(Function.layout().begin(),
@ -245,6 +554,8 @@ void InlineSmallFunctions::inlineCallsInFunction(
}
}
bool DidInlining = false;
for (auto BB : Blocks) {
if (BB->isCold())
continue;
@ -272,6 +583,7 @@ void InlineSmallFunctions::inlineCallsInFunction(
+ Function.estimateHotSize() < Function.getMaxSize()) {
auto NextInstIt = std::next(InstIt);
inlineCall(BC, *BB, &Inst, *TargetFunction.begin());
DidInlining = true;
DEBUG(errs() << "BOLT-DEBUG: Inlining call to "
<< TargetFunction.getName() << " in "
<< Function.getName() << "\n");
@ -286,6 +598,81 @@ void InlineSmallFunctions::inlineCallsInFunction(
++InstIt;
}
}
return DidInlining;
}
bool InlineSmallFunctions::inlineCallsInFunctionAggressive(
BinaryContext &BC,
BinaryFunction &Function) {
std::vector<BinaryBasicBlock *> Blocks(Function.layout().begin(),
Function.layout().end());
std::sort(Blocks.begin(), Blocks.end(),
[](const BinaryBasicBlock *BB1, const BinaryBasicBlock *BB2) {
return BB1->getExecutionCount() > BB2->getExecutionCount();
});
uint32_t ExtraSize = 0;
for (auto BB : Blocks) {
for (auto InstIt = BB->begin(), End = BB->end(); InstIt != End; ++InstIt) {
auto &Inst = *InstIt;
if (BC.MIA->isCall(Inst)) {
totalDynamicCalls += BB->getExecutionCount();
}
}
}
bool DidInlining = false;
for (auto BB : Blocks) {
if (BB->isCold())
continue;
unsigned InstIndex = 0;
for (auto InstIt = BB->begin(); InstIt != BB->end(); ) {
auto &Inst = *InstIt;
if (BC.MIA->isCall(Inst) &&
Inst.size() == 1 &&
Inst.getOperand(0).isExpr()) {
assert(!BC.MIA->isInvoke(Inst));
auto Target = dyn_cast<MCSymbolRefExpr>(
Inst.getOperand(0).getExpr());
assert(Target && "Not MCSymbolRefExpr");
auto FunctionIt = FunctionByName.find(Target->getSymbol().getName());
if (FunctionIt != FunctionByName.end()) {
auto &TargetFunction = *FunctionIt->second;
bool CallToInlineableFunction =
InliningCandidates.count(TargetFunction.getName());
totalInlineableCalls +=
CallToInlineableFunction * BB->getExecutionCount();
if (CallToInlineableFunction &&
TargetFunction.getSize() + ExtraSize
+ Function.estimateHotSize() < Function.getMaxSize()) {
unsigned NextInstIndex = 0;
BinaryBasicBlock *NextBB = nullptr;
std::tie(NextBB, NextInstIndex) =
inlineCall(BC, Function, BB, InstIndex, TargetFunction);
DidInlining = true;
DEBUG(errs() << "BOLT-DEBUG: Inlining call to "
<< TargetFunction.getName() << " in "
<< Function.getName() << "\n");
InstIndex = NextBB == BB ? NextInstIndex : BB->size();
InstIt = NextBB == BB ? BB->begin() + NextInstIndex : BB->end();
ExtraSize += TargetFunction.getSize();
inlinedDynamicCalls += BB->getExecutionCount();
continue;
}
}
}
++InstIndex;
++InstIt;
}
}
return DidInlining;
}
void InlineSmallFunctions::runOnFunctions(
@ -295,17 +682,30 @@ void InlineSmallFunctions::runOnFunctions(
for (auto &It : BFs) {
FunctionByName[It.second.getName()] = &It.second;
}
findInliningCandidates(BC, BFs);
uint32_t ConsideredFunctions = 0;
std::vector<BinaryFunction *> ConsideredFunctions;
for (auto &It : BFs) {
auto &Function = It.second;
if (!Function.isSimple() || !opts::shouldProcess(Function))
continue;
if (ConsideredFunctions == kMaxFunctions)
break;
inlineCallsInFunction(BC, Function);
++ConsideredFunctions;
if (Function.getExecutionCount() == BinaryFunction::COUNT_NO_PROFILE)
continue;
ConsideredFunctions.push_back(&Function);
}
std::sort(ConsideredFunctions.begin(), ConsideredFunctions.end(),
[](BinaryFunction *A, BinaryFunction *B) {
return B->getExecutionCount() < A->getExecutionCount();
});
unsigned ModifiedFunctions = 0;
for (unsigned i = 0; i < ConsideredFunctions.size() &&
ModifiedFunctions <= kMaxFunctions; ++i) {
auto &Function = *ConsideredFunctions[i];
if (inlineCallsInFunction(BC, Function))
++ModifiedFunctions;
}
DEBUG(errs() << "BOLT-DEBUG: Inlined " << inlinedDynamicCalls << " of "
<< totalDynamicCalls << " function calls in the profile.\n");
DEBUG(errs() << "BOLT-DEBUG: Inlined calls represent "

View File

@ -64,8 +64,11 @@ private:
/// Maximum number of instructions in an inlined function.
static const unsigned kMaxInstructions = 8;
/// Maximum code size (in bytes) of inlined function (used by aggressive
/// inlining).
static const uint64_t kMaxSize = 60;
/// Maximum number of functions that will be considered for inlining (in
/// ascending address order).
/// descending hottness order).
static const unsigned kMaxFunctions = 30000;
/// Statistics collected for debugging.
@ -83,9 +86,32 @@ private:
MCInst *CallInst,
const BinaryBasicBlock &InlinedFunctionBB);
void inlineCallsInFunction(BinaryContext &BC,
bool inlineCallsInFunction(BinaryContext &BC,
BinaryFunction &Function);
/// The following methods do a more aggressive inlining pass, where we
/// inline calls as well as tail calls and we are not limited to inlining
/// functions with only one basic block.
/// FIXME: Currently these are broken since they do not work with the split
/// function option.
void findInliningCandidatesAggressive(
BinaryContext &BC, const std::map<uint64_t, BinaryFunction> &BFs);
bool inlineCallsInFunctionAggressive(
BinaryContext &BC, BinaryFunction &Function);
/// Inline the call in CallInst to InlinedFunction. Inlined function should not
/// contain any landing pad or thrower edges but can have more than one blocks.
///
/// Return the location (basic block and instruction index) where the code of
/// the caller function continues after the the inlined code.
std::pair<BinaryBasicBlock *, unsigned>
inlineCall(BinaryContext &BC,
BinaryFunction &CallerFunction,
BinaryBasicBlock *CallerBB,
const unsigned CallInstIdex,
const BinaryFunction &InlinedFunction);
public:
void runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,