llvm-project/bolt/lib/Passes/TailDuplication.cpp

476 lines
18 KiB
C++
Raw Normal View History

//===--------- Passes/TailDuplication.cpp -------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
#include "bolt/Passes/TailDuplication.h"
#include <numeric>
#define DEBUG_TYPE "taildup"
using namespace llvm;
namespace opts {
extern cl::OptionCategory BoltOptCategory;
static cl::opt<bool> TailDuplicationAggressive(
"tail-duplication-aggressive",
cl::desc("tail duplication should act aggressively in duplicating multiple "
"blocks per tail"),
cl::ZeroOrMore, cl::ReallyHidden, cl::init(false),
cl::cat(BoltOptCategory));
static cl::opt<unsigned>
TailDuplicationMinimumOffset("tail-duplication-minimum-offset",
cl::desc("minimum offset needed between block "
"and successor to allow duplication"),
cl::ZeroOrMore, cl::ReallyHidden, cl::init(64),
cl::cat(BoltOptCategory));
static cl::opt<unsigned> TailDuplicationMaximumDuplication(
"tail-duplication-maximum-duplication",
cl::desc("maximum size of duplicated blocks (in bytes)"), cl::ZeroOrMore,
cl::ReallyHidden, cl::init(64), cl::cat(BoltOptCategory));
static cl::opt<bool> TailDuplicationConstCopyPropagation(
"tail-duplication-const-copy-propagation",
cl::desc("enable const and copy propagation after tail duplication"),
cl::ReallyHidden, cl::init(false), cl::cat(BoltOptCategory));
} // namespace opts
namespace llvm {
namespace bolt {
void TailDuplication::getCallerSavedRegs(const MCInst &Inst, BitVector &Regs,
BinaryContext &BC) const {
if (!BC.MIB->isCall(Inst))
return;
BitVector CallRegs = BitVector(BC.MRI->getNumRegs(), false);
BC.MIB->getCalleeSavedRegs(CallRegs);
CallRegs.flip();
Regs |= CallRegs;
}
bool TailDuplication::regIsPossiblyOverwritten(const MCInst &Inst, unsigned Reg,
BinaryContext &BC) const {
BitVector WrittenRegs = BitVector(BC.MRI->getNumRegs(), false);
BC.MIB->getWrittenRegs(Inst, WrittenRegs);
getCallerSavedRegs(Inst, WrittenRegs, BC);
if (BC.MIB->isRep(Inst))
BC.MIB->getRepRegs(WrittenRegs);
WrittenRegs &= BC.MIB->getAliases(Reg, false);
return WrittenRegs.any();
}
bool TailDuplication::regIsDefinitelyOverwritten(const MCInst &Inst,
unsigned Reg,
BinaryContext &BC) const {
BitVector WrittenRegs = BitVector(BC.MRI->getNumRegs(), false);
BC.MIB->getWrittenRegs(Inst, WrittenRegs);
getCallerSavedRegs(Inst, WrittenRegs, BC);
if (BC.MIB->isRep(Inst))
BC.MIB->getRepRegs(WrittenRegs);
return (!regIsUsed(Inst, Reg, BC) && WrittenRegs.test(Reg) &&
!BC.MIB->isConditionalMove(Inst));
}
bool TailDuplication::regIsUsed(const MCInst &Inst, unsigned Reg,
BinaryContext &BC) const {
BitVector SrcRegs = BitVector(BC.MRI->getNumRegs(), false);
BC.MIB->getSrcRegs(Inst, SrcRegs);
SrcRegs &= BC.MIB->getAliases(Reg, true);
return SrcRegs.any();
}
bool TailDuplication::isOverwrittenBeforeUsed(BinaryBasicBlock &StartBB,
unsigned Reg) const {
BinaryFunction *BF = StartBB.getFunction();
BinaryContext &BC = BF->getBinaryContext();
std::queue<BinaryBasicBlock *> Q;
for (auto Itr = StartBB.succ_begin(); Itr != StartBB.succ_end(); ++Itr) {
BinaryBasicBlock *NextBB = *Itr;
Q.push(NextBB);
}
std::set<BinaryBasicBlock *> Visited;
// Breadth first search through successive blocks and see if Reg is ever used
// before its overwritten
while (Q.size() > 0) {
BinaryBasicBlock *CurrBB = Q.front();
Q.pop();
if (Visited.count(CurrBB))
continue;
Visited.insert(CurrBB);
bool Overwritten = false;
for (auto Itr = CurrBB->begin(); Itr != CurrBB->end(); ++Itr) {
MCInst &Inst = *Itr;
if (regIsUsed(Inst, Reg, BC))
return false;
if (regIsDefinitelyOverwritten(Inst, Reg, BC)) {
Overwritten = true;
break;
}
}
if (Overwritten)
continue;
for (auto Itr = CurrBB->succ_begin(); Itr != CurrBB->succ_end(); ++Itr) {
BinaryBasicBlock *NextBB = *Itr;
Q.push(NextBB);
}
}
return true;
}
void TailDuplication::constantAndCopyPropagate(
BinaryBasicBlock &OriginalBB,
std::vector<BinaryBasicBlock *> &BlocksToPropagate) {
BinaryFunction *BF = OriginalBB.getFunction();
BinaryContext &BC = BF->getBinaryContext();
BlocksToPropagate.insert(BlocksToPropagate.begin(), &OriginalBB);
// Iterate through the original instructions to find one to propagate
for (auto Itr = OriginalBB.begin(); Itr != OriginalBB.end(); ++Itr) {
MCInst &OriginalInst = *Itr;
// It must be a non conditional
if (BC.MIB->isConditionalMove(OriginalInst))
continue;
// Move immediate or move register
if ((!BC.MII->get(OriginalInst.getOpcode()).isMoveImmediate() ||
!OriginalInst.getOperand(1).isImm()) &&
(!BC.MII->get(OriginalInst.getOpcode()).isMoveReg() ||
!OriginalInst.getOperand(1).isReg()))
continue;
// True if this is constant propagation and not copy propagation
bool ConstantProp = BC.MII->get(OriginalInst.getOpcode()).isMoveImmediate();
// The Register to replaced
unsigned Reg = OriginalInst.getOperand(0).getReg();
// True if the register to replace was replaced everywhere it was used
bool ReplacedEverywhere = true;
// True if the register was definitely overwritten
bool Overwritten = false;
// True if the register to replace and the register to replace with (for
// copy propagation) has not been overwritten and is still usable
bool RegsActive = true;
// Iterate through successor blocks and through their instructions
for (BinaryBasicBlock *NextBB : BlocksToPropagate) {
for (auto PropagateItr =
((NextBB == &OriginalBB) ? Itr + 1 : NextBB->begin());
PropagateItr < NextBB->end(); ++PropagateItr) {
MCInst &PropagateInst = *PropagateItr;
if (regIsUsed(PropagateInst, Reg, BC)) {
bool Replaced = false;
// If both registers are active for copy propagation or the register
// to replace is active for constant propagation
if (RegsActive) {
// Set Replaced and so ReplacedEverwhere to false if it cannot be
// replaced (no replacing that opcode, Register is src and dest)
if (ConstantProp) {
Replaced = BC.MIB->replaceRegWithImm(
PropagateInst, Reg, OriginalInst.getOperand(1).getImm());
} else {
Replaced = BC.MIB->replaceRegWithReg(
PropagateInst, Reg, OriginalInst.getOperand(1).getReg());
}
}
ReplacedEverywhere = ReplacedEverywhere && Replaced;
}
// For copy propagation, make sure no propagation happens after the
// register to replace with is overwritten
if (!ConstantProp &&
regIsPossiblyOverwritten(PropagateInst,
OriginalInst.getOperand(1).getReg(), BC)) {
RegsActive = false;
}
// Make sure no propagation happens after the register to replace is
// overwritten
if (regIsPossiblyOverwritten(PropagateInst, Reg, BC)) {
RegsActive = false;
}
// Record if the register to replace is overwritten
if (regIsDefinitelyOverwritten(PropagateInst, Reg, BC)) {
Overwritten = true;
break;
}
}
if (Overwritten)
break;
}
// If the register was replaced everwhere and it was overwritten in either
// one of the iterated through blocks or one of the successor blocks, delete
// the original move instruction
if (ReplacedEverywhere &&
(Overwritten ||
isOverwrittenBeforeUsed(
*BlocksToPropagate[BlocksToPropagate.size() - 1], Reg))) {
// If both registers are active for copy propagation or the register
// to replace is active for constant propagation
StaticInstructionDeletionCount++;
DynamicInstructionDeletionCount += OriginalBB.getExecutionCount();
Itr = std::prev(OriginalBB.eraseInstruction(Itr));
}
}
}
bool TailDuplication::isInCacheLine(const BinaryBasicBlock &BB,
const BinaryBasicBlock &Succ) const {
if (&BB == &Succ)
return true;
BinaryFunction::BasicBlockOrderType BlockLayout =
BB.getFunction()->getLayout();
uint64_t Distance = 0;
int Direction = (Succ.getLayoutIndex() > BB.getLayoutIndex()) ? 1 : -1;
for (unsigned I = BB.getLayoutIndex() + Direction; I != Succ.getLayoutIndex();
I += Direction) {
Distance += BlockLayout[I]->getOriginalSize();
if (Distance > opts::TailDuplicationMinimumOffset)
return false;
}
return true;
}
std::vector<BinaryBasicBlock *>
TailDuplication::moderateCodeToDuplicate(BinaryBasicBlock &BB) const {
std::vector<BinaryBasicBlock *> BlocksToDuplicate;
if (BB.hasJumpTable())
return BlocksToDuplicate;
if (BB.getOriginalSize() > opts::TailDuplicationMaximumDuplication)
return BlocksToDuplicate;
for (auto Itr = BB.succ_begin(); Itr != BB.succ_end(); ++Itr) {
if ((*Itr)->getLayoutIndex() == BB.getLayoutIndex() + 1)
// If duplicating would introduce a new branch, don't duplicate
return BlocksToDuplicate;
}
BlocksToDuplicate.push_back(&BB);
return BlocksToDuplicate;
}
std::vector<BinaryBasicBlock *>
TailDuplication::aggressiveCodeToDuplicate(BinaryBasicBlock &BB) const {
std::vector<BinaryBasicBlock *> BlocksToDuplicate;
BinaryBasicBlock *CurrBB = &BB;
while (CurrBB) {
LLVM_DEBUG(dbgs() << "Aggressive tail duplication: adding "
<< CurrBB->getName() << " to duplication list\n";);
BlocksToDuplicate.push_back(CurrBB);
if (CurrBB->hasJumpTable()) {
LLVM_DEBUG(dbgs() << "Aggressive tail duplication: clearing duplication "
"list due to a JT in "
<< CurrBB->getName() << '\n';);
BlocksToDuplicate.clear();
break;
}
// With no successors, we've reached the end and should duplicate all of
// BlocksToDuplicate
if (CurrBB->succ_size() == 0)
break;
// With two successors, if they're both a jump, we should duplicate all
// blocks in BlocksToDuplicate. Otherwise, we cannot find a simple stream of
// blocks to copy
if (CurrBB->succ_size() >= 2) {
if (CurrBB->getConditionalSuccessor(false)->getLayoutIndex() ==
CurrBB->getLayoutIndex() + 1 ||
CurrBB->getConditionalSuccessor(true)->getLayoutIndex() ==
CurrBB->getLayoutIndex() + 1) {
LLVM_DEBUG(dbgs() << "Aggressive tail duplication: clearing "
"duplication list, can't find a simple stream at "
<< CurrBB->getName() << '\n';);
BlocksToDuplicate.clear();
}
break;
}
// With one successor, if its a jump, we should duplicate all blocks in
// BlocksToDuplicate. Otherwise, we should keep going
BinaryBasicBlock *Succ = CurrBB->getSuccessor();
if (Succ->getLayoutIndex() != CurrBB->getLayoutIndex() + 1)
break;
CurrBB = Succ;
}
// Don't duplicate if its too much code
unsigned DuplicationByteCount = std::accumulate(
std::begin(BlocksToDuplicate), std::end(BlocksToDuplicate), 0,
[](int value, BinaryBasicBlock *p) {
return value + p->getOriginalSize();
});
if (DuplicationByteCount > opts::TailDuplicationMaximumDuplication) {
LLVM_DEBUG(dbgs() << "Aggressive tail duplication: duplication byte count ("
<< DuplicationByteCount << ") exceeds maximum "
<< opts::TailDuplicationMaximumDuplication << '\n';);
BlocksToDuplicate.clear();
}
LLVM_DEBUG(dbgs() << "Aggressive tail duplication: found "
<< BlocksToDuplicate.size() << " blocks to duplicate\n";);
return BlocksToDuplicate;
}
std::vector<BinaryBasicBlock *> TailDuplication::tailDuplicate(
BinaryBasicBlock &BB,
const std::vector<BinaryBasicBlock *> &BlocksToDuplicate) const {
BinaryFunction *BF = BB.getFunction();
BinaryContext &BC = BF->getBinaryContext();
// Ratio of this new branches execution count to the total size of the
// successor's execution count. Used to set this new branches execution count
// and lower the old successor's execution count
double ExecutionCountRatio =
BB.getExecutionCount() > BB.getSuccessor()->getExecutionCount()
? 1.0
: (double)BB.getExecutionCount() /
BB.getSuccessor()->getExecutionCount();
// Use the last branch info when adding a successor to LastBB
BinaryBasicBlock::BinaryBranchInfo &LastBI =
BB.getBranchInfo(*(BB.getSuccessor()));
BinaryBasicBlock *LastOriginalBB = &BB;
BinaryBasicBlock *LastDuplicatedBB = &BB;
assert(LastDuplicatedBB->succ_size() == 1 &&
"tail duplication cannot act on a block with more than 1 successor");
LastDuplicatedBB->removeSuccessor(LastDuplicatedBB->getSuccessor());
std::vector<std::unique_ptr<BinaryBasicBlock>> DuplicatedBlocks;
std::vector<BinaryBasicBlock *> DuplicatedBlocksToReturn;
for (BinaryBasicBlock *CurrBB : BlocksToDuplicate) {
DuplicatedBlocks.emplace_back(
BF->createBasicBlock(0, (BC.Ctx)->createNamedTempSymbol("tail-dup")));
BinaryBasicBlock *NewBB = DuplicatedBlocks.back().get();
NewBB->addInstructions(CurrBB->begin(), CurrBB->end());
// Set execution count as if it was just a copy of the original
NewBB->setExecutionCount(
std::max((uint64_t)1, CurrBB->getExecutionCount()));
LastDuplicatedBB->addSuccessor(NewBB, LastBI);
DuplicatedBlocksToReturn.push_back(NewBB);
// As long as its not the first block, adjust both original and duplicated
// to what they should be
if (LastDuplicatedBB != &BB) {
LastOriginalBB->adjustExecutionCount(1.0 - ExecutionCountRatio);
LastDuplicatedBB->adjustExecutionCount(ExecutionCountRatio);
}
if (CurrBB->succ_size() == 1)
LastBI = CurrBB->getBranchInfo(*(CurrBB->getSuccessor()));
LastOriginalBB = CurrBB;
LastDuplicatedBB = NewBB;
}
LastDuplicatedBB->addSuccessors(
LastOriginalBB->succ_begin(), LastOriginalBB->succ_end(),
LastOriginalBB->branch_info_begin(), LastOriginalBB->branch_info_end());
LastOriginalBB->adjustExecutionCount(1.0 - ExecutionCountRatio);
LastDuplicatedBB->adjustExecutionCount(ExecutionCountRatio);
BF->insertBasicBlocks(&BB, std::move(DuplicatedBlocks));
return DuplicatedBlocksToReturn;
}
void TailDuplication::runOnFunction(BinaryFunction &Function) {
// New blocks will be added and layout will change,
// so make a copy here to iterate over the original layout
BinaryFunction::BasicBlockOrderType BlockLayout = Function.getLayout();
for (BinaryBasicBlock *BB : BlockLayout) {
if (BB->succ_size() == 1 &&
BB->getSuccessor()->getLayoutIndex() != BB->getLayoutIndex() + 1)
UnconditionalBranchDynamicCount += BB->getExecutionCount();
if (BB->succ_size() == 2 &&
BB->getFallthrough()->getLayoutIndex() != BB->getLayoutIndex() + 1)
UnconditionalBranchDynamicCount += BB->getFallthroughBranchInfo().Count;
AllBlocksDynamicCount += BB->getExecutionCount();
// The block must be hot
if (BB->getExecutionCount() == 0)
continue;
// with one successor
if (BB->succ_size() != 1)
continue;
// no jump table
if (BB->hasJumpTable())
continue;
// Skip not-in-layout, i.e. unreachable, blocks.
if (BB->getLayoutIndex() >= BlockLayout.size())
continue;
// and we are estimating that this sucessor is not already in the same cache
// line
BinaryBasicBlock *Succ = BB->getSuccessor();
if (isInCacheLine(*BB, *Succ))
continue;
std::vector<BinaryBasicBlock *> BlocksToDuplicate;
if (opts::TailDuplicationAggressive)
BlocksToDuplicate = aggressiveCodeToDuplicate(*Succ);
else
BlocksToDuplicate = moderateCodeToDuplicate(*Succ);
if (BlocksToDuplicate.size() == 0)
continue;
PossibleDuplications++;
PossibleDuplicationsDynamicCount += BB->getExecutionCount();
std::vector<BinaryBasicBlock *> DuplicatedBlocks =
tailDuplicate(*BB, BlocksToDuplicate);
if (!opts::TailDuplicationConstCopyPropagation)
continue;
constantAndCopyPropagate(*BB, DuplicatedBlocks);
BinaryBasicBlock *FirstBB = BlocksToDuplicate[0];
if (FirstBB->pred_size() == 1) {
BinaryBasicBlock *PredBB = *FirstBB->pred_begin();
if (PredBB->succ_size() == 1)
constantAndCopyPropagate(*PredBB, BlocksToDuplicate);
}
}
}
void TailDuplication::runOnFunctions(BinaryContext &BC) {
for (auto &It : BC.getBinaryFunctions()) {
BinaryFunction &Function = It.second;
if (!shouldOptimize(Function))
continue;
runOnFunction(Function);
}
outs() << "BOLT-INFO: tail duplication possible duplications: "
<< PossibleDuplications << "\n";
outs() << "BOLT-INFO: tail duplication possible dynamic reductions: "
<< PossibleDuplicationsDynamicCount << "\n";
outs() << "BOLT-INFO: tail duplication possible dynamic reductions to "
"unconditional branch execution : "
<< format("%.1f", ((float)PossibleDuplicationsDynamicCount * 100.0f) /
UnconditionalBranchDynamicCount)
<< "%\n";
outs() << "BOLT-INFO: tail duplication possible dynamic reductions to all "
"blocks execution : "
<< format("%.1f", ((float)PossibleDuplicationsDynamicCount * 100.0f) /
AllBlocksDynamicCount)
<< "%\n";
outs() << "BOLT-INFO: tail duplication static propagation deletions: "
<< StaticInstructionDeletionCount << "\n";
outs() << "BOLT-INFO: tail duplication dynamic propagation deletions: "
<< DynamicInstructionDeletionCount << "\n"; //
}
} // end namespace bolt
} // end namespace llvm