forked from OSchip/llvm-project
[MBP] Clean up of the comments, and a first attempt to better describe a part
of the algorithm. Differential Revision: https://reviews.llvm.org/D22364 llvm-svn: 275595
This commit is contained in:
parent
27fefb2fcf
commit
fd0ad4e193
|
@ -109,7 +109,7 @@ static cl::opt<bool>
|
||||||
|
|
||||||
static cl::opt<unsigned> MisfetchCost(
|
static cl::opt<unsigned> MisfetchCost(
|
||||||
"misfetch-cost",
|
"misfetch-cost",
|
||||||
cl::desc("Cost that models the probablistic risk of an instruction "
|
cl::desc("Cost that models the probabilistic risk of an instruction "
|
||||||
"misfetch due to a jump comparing to falling through, whose cost "
|
"misfetch due to a jump comparing to falling through, whose cost "
|
||||||
"is zero."),
|
"is zero."),
|
||||||
cl::init(1), cl::Hidden);
|
cl::init(1), cl::Hidden);
|
||||||
|
@ -310,7 +310,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
|
||||||
MachineFunction::iterator &PrevUnplacedBlockIt,
|
MachineFunction::iterator &PrevUnplacedBlockIt,
|
||||||
const BlockFilterSet *BlockFilter);
|
const BlockFilterSet *BlockFilter);
|
||||||
|
|
||||||
/// \brief Add a basic block to the work list if it is apropriate.
|
/// \brief Add a basic block to the work list if it is appropriate.
|
||||||
///
|
///
|
||||||
/// If the optional parameter BlockFilter is provided, only MBB
|
/// If the optional parameter BlockFilter is provided, only MBB
|
||||||
/// present in the set will be added to the worklist. If nullptr
|
/// present in the set will be added to the worklist. If nullptr
|
||||||
|
@ -438,7 +438,7 @@ BranchProbability MachineBlockPlacement::collectViableSuccessors(
|
||||||
// Assume A->C is very hot (>90%), and C->D has a 50% probability, then after
|
// Assume A->C is very hot (>90%), and C->D has a 50% probability, then after
|
||||||
// A->C is chosen as a fall-through, D won't be selected as a successor of C
|
// A->C is chosen as a fall-through, D won't be selected as a successor of C
|
||||||
// due to CFG constraint (the probability of C->D is not greater than
|
// due to CFG constraint (the probability of C->D is not greater than
|
||||||
// HotProb to break top-oorder). If we exclude E that is not in BlockFilter
|
// HotProb to break top-order). If we exclude E that is not in BlockFilter
|
||||||
// when calculating the probability of C->D, D will be selected and we
|
// when calculating the probability of C->D, D will be selected and we
|
||||||
// will get A C D B as the layout of this loop.
|
// will get A C D B as the layout of this loop.
|
||||||
auto AdjustedSumProb = BranchProbability::getOne();
|
auto AdjustedSumProb = BranchProbability::getOne();
|
||||||
|
@ -466,7 +466,6 @@ BranchProbability MachineBlockPlacement::collectViableSuccessors(
|
||||||
|
|
||||||
/// The helper function returns the branch probability that is adjusted
|
/// The helper function returns the branch probability that is adjusted
|
||||||
/// or normalized over the new total \p AdjustedSumProb.
|
/// or normalized over the new total \p AdjustedSumProb.
|
||||||
|
|
||||||
static BranchProbability
|
static BranchProbability
|
||||||
getAdjustedProbability(BranchProbability OrigProb,
|
getAdjustedProbability(BranchProbability OrigProb,
|
||||||
BranchProbability AdjustedSumProb) {
|
BranchProbability AdjustedSumProb) {
|
||||||
|
@ -484,7 +483,7 @@ getAdjustedProbability(BranchProbability OrigProb,
|
||||||
/// When the option OutlineOptionalBranches is on, this method
|
/// When the option OutlineOptionalBranches is on, this method
|
||||||
/// checks if the fallthrough candidate block \p Succ (of block
|
/// checks if the fallthrough candidate block \p Succ (of block
|
||||||
/// \p BB) also has other unscheduled predecessor blocks which
|
/// \p BB) also has other unscheduled predecessor blocks which
|
||||||
/// are also successors of \p BB (forming triagular shape CFG).
|
/// are also successors of \p BB (forming triangular shape CFG).
|
||||||
/// If none of such predecessors are small, it returns true.
|
/// If none of such predecessors are small, it returns true.
|
||||||
/// The caller can choose to select \p Succ as the layout successors
|
/// The caller can choose to select \p Succ as the layout successors
|
||||||
/// so that \p Succ's predecessors (optional branches) can be
|
/// so that \p Succ's predecessors (optional branches) can be
|
||||||
|
@ -550,13 +549,13 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
|
||||||
BranchProbability SuccProb, BranchProbability RealSuccProb,
|
BranchProbability SuccProb, BranchProbability RealSuccProb,
|
||||||
BlockChain &Chain, const BlockFilterSet *BlockFilter) {
|
BlockChain &Chain, const BlockFilterSet *BlockFilter) {
|
||||||
|
|
||||||
// This is no global conflict, just return false.
|
// There isn't a better layout when there are no unscheduled predecessors.
|
||||||
if (SuccChain.UnscheduledPredecessors == 0)
|
if (SuccChain.UnscheduledPredecessors == 0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// There are two basic scenarios here:
|
// There are two basic scenarios here:
|
||||||
// -------------------------------------
|
// -------------------------------------
|
||||||
// Case 1: triagular shape CFG:
|
// Case 1: triangular shape CFG (if-then):
|
||||||
// BB
|
// BB
|
||||||
// | \
|
// | \
|
||||||
// | \
|
// | \
|
||||||
|
@ -565,11 +564,13 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
|
||||||
// Succ
|
// Succ
|
||||||
// In this case, we are evaluating whether to select edge -> Succ, e.g.
|
// In this case, we are evaluating whether to select edge -> Succ, e.g.
|
||||||
// set Succ as the layout successor of BB. Picking Succ as BB's
|
// set Succ as the layout successor of BB. Picking Succ as BB's
|
||||||
// successor breaks the CFG constraints. With this layout, Pred BB
|
// successor breaks the CFG constraints (FIXME: define these constraints).
|
||||||
|
// With this layout, Pred BB
|
||||||
// is forced to be outlined, so the overall cost will be cost of the
|
// is forced to be outlined, so the overall cost will be cost of the
|
||||||
// branch taken from BB to Pred, plus the cost of back taken branch
|
// branch taken from BB to Pred, plus the cost of back taken branch
|
||||||
// from Pred to Succ, as well as the additional cost asssociated
|
// from Pred to Succ, as well as the additional cost associated
|
||||||
// with the needed unconditional jump instruction from Pred To Succ.
|
// with the needed unconditional jump instruction from Pred To Succ.
|
||||||
|
|
||||||
// The cost of the topological order layout is the taken branch cost
|
// The cost of the topological order layout is the taken branch cost
|
||||||
// from BB to Succ, so to make BB->Succ a viable candidate, the following
|
// from BB to Succ, so to make BB->Succ a viable candidate, the following
|
||||||
// must hold:
|
// must hold:
|
||||||
|
@ -579,12 +580,12 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
|
||||||
// freq(BB->Succ) > 2 * freq(BB->Pred), i.e.,
|
// freq(BB->Succ) > 2 * freq(BB->Pred), i.e.,
|
||||||
// prob(BB->Succ) > 2 * prob(BB->Pred)
|
// prob(BB->Succ) > 2 * prob(BB->Pred)
|
||||||
//
|
//
|
||||||
// When real profile data is available, we can precisely compute the the
|
// When real profile data is available, we can precisely compute the
|
||||||
// probabililty threshold that is needed for edge BB->Succ to be considered.
|
// probability threshold that is needed for edge BB->Succ to be considered.
|
||||||
// With out profile data, the heuristic requires the branch bias to be
|
// Without profile data, the heuristic requires the branch bias to be
|
||||||
// a lot larger to make sure the signal is very strong (e.g. 80% default).
|
// a lot larger to make sure the signal is very strong (e.g. 80% default).
|
||||||
// -----------------------------------------------------------------
|
// -----------------------------------------------------------------
|
||||||
// Case 2: diamond like CFG:
|
// Case 2: diamond like CFG (if-then-else):
|
||||||
// S
|
// S
|
||||||
// / \
|
// / \
|
||||||
// | \
|
// | \
|
||||||
|
@ -592,18 +593,38 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
|
||||||
// \ /
|
// \ /
|
||||||
// Succ
|
// Succ
|
||||||
// ..
|
// ..
|
||||||
// In this case, edge S->BB has already been selected, and we are evaluating
|
//
|
||||||
// candidate edge BB->Succ. Edge S->BB is selected because prob(S->BB)
|
// The current block is BB and edge BB->Succ is now being evaluated.
|
||||||
// is no less than prob(S->Pred). When real profile data is *available*, if
|
// Note that edge S->BB was previously already selected because
|
||||||
// the condition is true, it will be always better to continue the trace with
|
// prob(S->BB) > prob(S->Pred).
|
||||||
// edge BB->Succ instead of laying out with topological order (i.e. laying
|
// At this point, 2 blocks can be placed after BB: Pred or Succ. If we
|
||||||
// Pred first). The cost of S->BB->Succ is 2 * freq (S->Pred), while with
|
// choose Pred, we will have a topological ordering as shown on the left
|
||||||
// the topo order, the cost is freq(S-> Pred) + Pred(S->BB) which is larger.
|
// in the picture below. If we choose Succ, we have the solution as shown
|
||||||
|
// on the right:
|
||||||
|
//
|
||||||
|
// topo-order:
|
||||||
|
//
|
||||||
|
// S----- ---S
|
||||||
|
// | | | |
|
||||||
|
// ---BB | | BB
|
||||||
|
// | | | |
|
||||||
|
// | pred-- | Succ--
|
||||||
|
// | | | |
|
||||||
|
// ---succ ---pred--
|
||||||
|
//
|
||||||
|
// cost = freq(S->Pred) + freq(BB->Succ) cost = 2 * freq (S->Pred)
|
||||||
|
// = freq(S->Pred) + freq(S->BB)
|
||||||
|
//
|
||||||
|
// If we have profile data (i.e, branch probabilities can be trusted), the
|
||||||
|
// cost (number of taken branches) with layout S->BB->Succ->Pred is 2 *
|
||||||
|
// freq(S->Pred) while the cost of topo order is freq(S->Pred) + freq(S->BB).
|
||||||
|
// We know Prob(S->BB) > Prob(S->Pred), so freq(S->BB) > freq(S->Pred), which
|
||||||
|
// means the cost of topological order is greater.
|
||||||
// When profile data is not available, however, we need to be more
|
// When profile data is not available, however, we need to be more
|
||||||
// conservative. If the branch prediction is wrong, breaking the topo-order
|
// conservative. If the branch prediction is wrong, breaking the topo-order
|
||||||
// will actually yield a layout with large cost. For this reason, we need
|
// will actually yield a layout with large cost. For this reason, we need
|
||||||
// strong biaaed branch at block S with Prob(S->BB) in order to select
|
// strong biased branch at block S with Prob(S->BB) in order to select
|
||||||
// BB->Succ. This is equialant to looking the CFG backward with backward
|
// BB->Succ. This is equivalent to looking the CFG backward with backward
|
||||||
// edge: Prob(Succ->BB) needs to >= HotProb in order to be selected (without
|
// edge: Prob(Succ->BB) needs to >= HotProb in order to be selected (without
|
||||||
// profile data).
|
// profile data).
|
||||||
|
|
||||||
|
@ -634,7 +655,7 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
|
||||||
// BB Pred
|
// BB Pred
|
||||||
// \ /
|
// \ /
|
||||||
// Succ
|
// Succ
|
||||||
// We select edgee BB->Succ if
|
// We select edge BB->Succ if
|
||||||
// freq(BB->Succ) > freq(Succ) * HotProb
|
// freq(BB->Succ) > freq(Succ) * HotProb
|
||||||
// i.e. freq(BB->Succ) > freq(BB->Succ) * HotProb + freq(Pred->Succ) *
|
// i.e. freq(BB->Succ) > freq(BB->Succ) * HotProb + freq(Pred->Succ) *
|
||||||
// HotProb
|
// HotProb
|
||||||
|
@ -716,7 +737,7 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
|
||||||
/// profitable only really makes sense in the context of a loop. This returns
|
/// profitable only really makes sense in the context of a loop. This returns
|
||||||
/// the most frequently visited block in the worklist, which in the case of
|
/// the most frequently visited block in the worklist, which in the case of
|
||||||
/// a loop, is the one most desirable to be physically close to the rest of the
|
/// a loop, is the one most desirable to be physically close to the rest of the
|
||||||
/// loop body in order to improve icache behavior.
|
/// loop body in order to improve i-cache behavior.
|
||||||
///
|
///
|
||||||
/// \returns The best block found, or null if none are viable.
|
/// \returns The best block found, or null if none are viable.
|
||||||
MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
|
MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
|
||||||
|
@ -971,7 +992,7 @@ MachineBlockPlacement::findBestLoopExit(MachineLoop &L,
|
||||||
unsigned BestExitLoopDepth = 0;
|
unsigned BestExitLoopDepth = 0;
|
||||||
MachineBasicBlock *ExitingBB = nullptr;
|
MachineBasicBlock *ExitingBB = nullptr;
|
||||||
// If there are exits to outer loops, loop rotation can severely limit
|
// If there are exits to outer loops, loop rotation can severely limit
|
||||||
// fallthrough opportunites unless it selects such an exit. Keep a set of
|
// fallthrough opportunities unless it selects such an exit. Keep a set of
|
||||||
// blocks where rotating to exit with that block will reach an outer loop.
|
// blocks where rotating to exit with that block will reach an outer loop.
|
||||||
SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
|
SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
|
||||||
|
|
||||||
|
@ -1379,7 +1400,7 @@ void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
|
||||||
EHPadWorkList.clear();
|
EHPadWorkList.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// When OutlineOpitonalBranches is on, this method colects BBs that
|
/// When OutlineOpitonalBranches is on, this method collects BBs that
|
||||||
/// dominates all terminator blocks of the function \p F.
|
/// dominates all terminator blocks of the function \p F.
|
||||||
void MachineBlockPlacement::collectMustExecuteBBs() {
|
void MachineBlockPlacement::collectMustExecuteBBs() {
|
||||||
if (OutlineOptionalBranches) {
|
if (OutlineOptionalBranches) {
|
||||||
|
@ -1502,7 +1523,7 @@ void MachineBlockPlacement::buildCFGChains() {
|
||||||
MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
|
MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
|
||||||
|
|
||||||
// The "PrevBB" is not yet updated to reflect current code layout, so,
|
// The "PrevBB" is not yet updated to reflect current code layout, so,
|
||||||
// o. it may fall-through to a block without explict "goto" instruction
|
// o. it may fall-through to a block without explicit "goto" instruction
|
||||||
// before layout, and no longer fall-through it after layout; or
|
// before layout, and no longer fall-through it after layout; or
|
||||||
// o. just opposite.
|
// o. just opposite.
|
||||||
//
|
//
|
||||||
|
@ -1661,7 +1682,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
|
||||||
// Changing the layout can create new tail merging opportunities.
|
// Changing the layout can create new tail merging opportunities.
|
||||||
TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
|
TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
|
||||||
// TailMerge can create jump into if branches that make CFG irreducible for
|
// TailMerge can create jump into if branches that make CFG irreducible for
|
||||||
// HW that requires structurized CFG.
|
// HW that requires structured CFG.
|
||||||
bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
|
bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
|
||||||
PassConfig->getEnableTailMerge() &&
|
PassConfig->getEnableTailMerge() &&
|
||||||
BranchFoldPlacement;
|
BranchFoldPlacement;
|
||||||
|
|
Loading…
Reference in New Issue