forked from OSchip/llvm-project
Add a flag to experiment with outlining optional branches.
In a CFG with the edges A->B->C and A->C, B is an optional branch. LLVM's default behavior is to lay the blocks out naturally, i.e. A, B, C, in order to improve code locality and fallthroughs. However, if a function contains many of those optional branches only a few of which are taken, this leads to a lot of unnecessary icache misses. Moving B out of line can work around this. Review: http://reviews.llvm.org/D7719 llvm-svn: 231230
This commit is contained in:
parent
783cbdcd89
commit
471e856f49
|
@ -33,6 +33,7 @@
|
||||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||||
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
|
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
|
||||||
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
|
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
|
||||||
|
#include "llvm/CodeGen/MachineDominators.h"
|
||||||
#include "llvm/CodeGen/MachineFunction.h"
|
#include "llvm/CodeGen/MachineFunction.h"
|
||||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||||
#include "llvm/CodeGen/MachineLoopInfo.h"
|
#include "llvm/CodeGen/MachineLoopInfo.h"
|
||||||
|
@ -67,6 +68,12 @@ ExitBlockBias("block-placement-exit-block-bias",
|
||||||
"over the original exit to be considered the new exit."),
|
"over the original exit to be considered the new exit."),
|
||||||
cl::init(0), cl::Hidden);
|
cl::init(0), cl::Hidden);
|
||||||
|
|
||||||
|
static cl::opt<bool> OutlineOptionalBranches(
|
||||||
|
"outline-optional-branches",
|
||||||
|
cl::desc("Put completely optional branches, i.e. branches with a common "
|
||||||
|
"post dominator, out of line."),
|
||||||
|
cl::init(false), cl::Hidden);
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
class BlockChain;
|
class BlockChain;
|
||||||
/// \brief Type for our function-wide basic block -> block chain mapping.
|
/// \brief Type for our function-wide basic block -> block chain mapping.
|
||||||
|
@ -188,6 +195,13 @@ class MachineBlockPlacement : public MachineFunctionPass {
|
||||||
/// \brief A handle to the target's lowering info.
|
/// \brief A handle to the target's lowering info.
|
||||||
const TargetLoweringBase *TLI;
|
const TargetLoweringBase *TLI;
|
||||||
|
|
||||||
|
/// \brief A handle to the post dominator tree.
|
||||||
|
MachineDominatorTree *MDT;
|
||||||
|
|
||||||
|
/// \brief A set of blocks that are unavoidably execute, i.e. they dominate
|
||||||
|
/// all terminators of the MachineFunction.
|
||||||
|
SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks;
|
||||||
|
|
||||||
/// \brief Allocator and owner of BlockChain structures.
|
/// \brief Allocator and owner of BlockChain structures.
|
||||||
///
|
///
|
||||||
/// We build BlockChains lazily while processing the loop structure of
|
/// We build BlockChains lazily while processing the loop structure of
|
||||||
|
@ -244,6 +258,7 @@ public:
|
||||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||||
AU.addRequired<MachineBranchProbabilityInfo>();
|
AU.addRequired<MachineBranchProbabilityInfo>();
|
||||||
AU.addRequired<MachineBlockFrequencyInfo>();
|
AU.addRequired<MachineBlockFrequencyInfo>();
|
||||||
|
AU.addRequired<MachineDominatorTree>();
|
||||||
AU.addRequired<MachineLoopInfo>();
|
AU.addRequired<MachineLoopInfo>();
|
||||||
MachineFunctionPass::getAnalysisUsage(AU);
|
MachineFunctionPass::getAnalysisUsage(AU);
|
||||||
}
|
}
|
||||||
|
@ -256,6 +271,7 @@ INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2",
|
||||||
"Branch Probability Basic Block Placement", false, false)
|
"Branch Probability Basic Block Placement", false, false)
|
||||||
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
|
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
|
||||||
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
|
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
|
||||||
|
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
|
||||||
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
|
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
|
||||||
INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2",
|
INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2",
|
||||||
"Branch Probability Basic Block Placement", false, false)
|
"Branch Probability Basic Block Placement", false, false)
|
||||||
|
@ -363,6 +379,13 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
|
||||||
uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ);
|
uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ);
|
||||||
BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
|
BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
|
||||||
|
|
||||||
|
// If we outline optional branches, look whether Succ is unavoidable, i.e.
|
||||||
|
// dominates all terminators of the MachineFunction. If it does, other
|
||||||
|
// successors must be optional. Don't do this for cold branches.
|
||||||
|
if (OutlineOptionalBranches && SuccProb > HotProb.getCompl() &&
|
||||||
|
UnavoidableBlocks.count(Succ) > 0)
|
||||||
|
return Succ;
|
||||||
|
|
||||||
// Only consider successors which are either "hot", or wouldn't violate
|
// Only consider successors which are either "hot", or wouldn't violate
|
||||||
// any CFG constraints.
|
// any CFG constraints.
|
||||||
if (SuccChain.LoopPredecessors != 0) {
|
if (SuccChain.LoopPredecessors != 0) {
|
||||||
|
@ -481,8 +504,7 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
|
||||||
}
|
}
|
||||||
|
|
||||||
void MachineBlockPlacement::buildChain(
|
void MachineBlockPlacement::buildChain(
|
||||||
MachineBasicBlock *BB,
|
MachineBasicBlock *BB, BlockChain &Chain,
|
||||||
BlockChain &Chain,
|
|
||||||
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
|
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
|
||||||
const BlockFilterSet *BlockFilter) {
|
const BlockFilterSet *BlockFilter) {
|
||||||
assert(BB);
|
assert(BB);
|
||||||
|
@ -899,6 +921,27 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (OutlineOptionalBranches) {
|
||||||
|
// Find the nearest common dominator of all of F's terminators.
|
||||||
|
MachineBasicBlock *Terminator = nullptr;
|
||||||
|
for (MachineBasicBlock &MBB : F) {
|
||||||
|
if (MBB.succ_size() == 0) {
|
||||||
|
if (Terminator == nullptr)
|
||||||
|
Terminator = &MBB;
|
||||||
|
else
|
||||||
|
Terminator = MDT->findNearestCommonDominator(Terminator, &MBB);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MBBs dominating this common dominator are unavoidable.
|
||||||
|
UnavoidableBlocks.clear();
|
||||||
|
for (MachineBasicBlock &MBB : F) {
|
||||||
|
if (MDT->dominates(&MBB, Terminator)) {
|
||||||
|
UnavoidableBlocks.insert(&MBB);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Build any loop-based chains.
|
// Build any loop-based chains.
|
||||||
for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE;
|
for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE;
|
||||||
++LI)
|
++LI)
|
||||||
|
@ -1110,6 +1153,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
|
||||||
MLI = &getAnalysis<MachineLoopInfo>();
|
MLI = &getAnalysis<MachineLoopInfo>();
|
||||||
TII = F.getSubtarget().getInstrInfo();
|
TII = F.getSubtarget().getInstrInfo();
|
||||||
TLI = F.getSubtarget().getTargetLowering();
|
TLI = F.getSubtarget().getTargetLowering();
|
||||||
|
MDT = &getAnalysis<MachineDominatorTree>();
|
||||||
assert(BlockToChain.empty());
|
assert(BlockToChain.empty());
|
||||||
|
|
||||||
buildCFGChains(F);
|
buildCFGChains(F);
|
||||||
|
|
|
@ -0,0 +1,50 @@
|
||||||
|
; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s -check-prefix=CHECK
|
||||||
|
; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux -outline-optional-branches < %s | FileCheck %s -check-prefix=CHECK-OUTLINE
|
||||||
|
|
||||||
|
define void @foo(i32 %t1, i32 %t2) {
|
||||||
|
; Test that we lift the call to 'c' up to immediately follow the call to 'b'
|
||||||
|
; when we disable the cfg conflict check.
|
||||||
|
;
|
||||||
|
; CHECK-LABEL: foo:
|
||||||
|
; CHECK: callq a
|
||||||
|
; CHECK: callq b
|
||||||
|
; CHECK: callq c
|
||||||
|
; CHECK: callq d
|
||||||
|
;
|
||||||
|
; CHECK-OUTLINE-LABEL: foo:
|
||||||
|
; CHECK-OUTLINE: callq b
|
||||||
|
; CHECK-OUTLINE: callq c
|
||||||
|
; CHECK-OUTLINE: callq d
|
||||||
|
; CHECK-OUTLINE: callq a
|
||||||
|
|
||||||
|
entry:
|
||||||
|
%cmp = icmp eq i32 %t1, 0
|
||||||
|
br i1 %cmp, label %if.then, label %if.end
|
||||||
|
|
||||||
|
if.then:
|
||||||
|
call void @a()
|
||||||
|
br label %if.end
|
||||||
|
|
||||||
|
if.end:
|
||||||
|
call void @b()
|
||||||
|
br label %hotbranch
|
||||||
|
|
||||||
|
hotbranch:
|
||||||
|
%cmp2 = icmp eq i32 %t2, 0
|
||||||
|
br i1 %cmp2, label %if.then2, label %if.end2, !prof !1
|
||||||
|
|
||||||
|
if.then2:
|
||||||
|
call void @c()
|
||||||
|
br label %if.end2
|
||||||
|
|
||||||
|
if.end2:
|
||||||
|
call void @d()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @a()
|
||||||
|
declare void @b()
|
||||||
|
declare void @c()
|
||||||
|
declare void @d()
|
||||||
|
|
||||||
|
!1 = !{!"branch_weights", i32 64, i32 4}
|
Loading…
Reference in New Issue