forked from OSchip/llvm-project
Codegen: MachineBlockPlacement Improve probability layout.
The following pattern was being layed out poorly: A / \ B C / \ / \ D E ? (Doesn't matter) Where A->B is far more likely than A->C, and prob(B->D) = prob(B->E) The current algorithm gives: A,B,C,E (D goes on worklist) It does this even if C has a frequency count of 0. This patch adjusts the layout calculation so that if freq(B->E) >> freq(C->E) then we go ahead and layout E rather than C. Fallthrough half the time is better than fallthrough never, or fallthrough very rarely. The resulting layout is: A,B,E, (C and D are in a worklist) llvm-svn: 277187
This commit is contained in:
parent
af324f76ad
commit
02d8d054ab
|
@ -631,18 +631,46 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
|
|||
// BB->Succ. This is equivalent to looking the CFG backward with backward
|
||||
// edge: Prob(Succ->BB) needs to >= HotProb in order to be selected (without
|
||||
// profile data).
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// Case 3: forked diamond
|
||||
// S
|
||||
// / \
|
||||
// / \
|
||||
// BB Pred
|
||||
// | \ / |
|
||||
// | \ / |
|
||||
// | X |
|
||||
// | / \ |
|
||||
// | / \ |
|
||||
// S1 S2
|
||||
//
|
||||
// The current block is BB and edge BB->S1 is now being evaluated.
|
||||
// As above S->BB was already selected because
|
||||
// prob(S->BB) > prob(S->Pred). Assume that prob(BB->S1) >= prob(BB->S2).
|
||||
//
|
||||
// topo-order:
|
||||
//
|
||||
// S-------| ---S
|
||||
// | | | |
|
||||
// ---BB | | BB
|
||||
// | | | |
|
||||
// | Pred----| | S1----
|
||||
// | | | |
|
||||
// --(S1 or S2) ---Pred--
|
||||
//
|
||||
// topo-cost = freq(S->Pred) + freq(BB->S1) + freq(BB->S2)
|
||||
// + min(freq(Pred->S1), freq(Pred->S2))
|
||||
// Non-topo-order cost:
|
||||
// In the worst case, S2 will not get laid out after Pred.
|
||||
// non-topo-cost = 2 * freq(S->Pred) + freq(BB->S2).
|
||||
// To be conservative, we can assume that min(freq(Pred->S1), freq(Pred->S2))
|
||||
// is 0. Then the non topo layout is better when
|
||||
// freq(S->Pred) < freq(BB->S1).
|
||||
// This is exactly what is checked below.
|
||||
// Note there are other shapes that apply (Pred may not be a single block,
|
||||
// but they all fit this general pattern.)
|
||||
BranchProbability HotProb = getLayoutSuccessorProbThreshold(BB);
|
||||
|
||||
// Forward checking. For case 2, SuccProb will be 1.
|
||||
if (SuccProb < HotProb) {
|
||||
DEBUG(dbgs() << " Not a candidate: " << getBlockName(Succ) << " "
|
||||
<< "Respecting topological ordering because "
|
||||
<< "probability is less than prob treshold: "
|
||||
<< SuccProb << "\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
// Make sure that a hot successor doesn't have a globally more
|
||||
// important predecessor.
|
||||
BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
|
||||
|
@ -653,11 +681,11 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
|
|||
(BlockFilter && !BlockFilter->count(Pred)) ||
|
||||
BlockToChain[Pred] == &Chain)
|
||||
continue;
|
||||
// Do backward checking. For case 1, it is actually redundant check. For
|
||||
// case 2 above, we need a backward checking to filter out edges that are
|
||||
// not 'strongly' biased. With profile data available, the check is mostly
|
||||
// redundant too (when threshold prob is set at 50%) unless S has more than
|
||||
// two successors.
|
||||
// Do backward checking.
|
||||
// For all cases above, we need a backward checking to filter out edges that
|
||||
// are not 'strongly' biased. With profile data available, the check is
|
||||
// mostly redundant for case 2 (when threshold prob is set at 50%) unless S
|
||||
// has more than two successors.
|
||||
// BB Pred
|
||||
// \ /
|
||||
// Succ
|
||||
|
@ -666,6 +694,8 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
|
|||
// i.e. freq(BB->Succ) > freq(BB->Succ) * HotProb + freq(Pred->Succ) *
|
||||
// HotProb
|
||||
// i.e. freq((BB->Succ) * (1 - HotProb) > freq(Pred->Succ) * HotProb
|
||||
// Case 1 is covered too, because the first equation reduces to:
|
||||
// prob(BB->Succ) > HotProb. (freq(Succ) = freq(BB) for a triangle)
|
||||
BlockFrequency PredEdgeFreq =
|
||||
MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
|
||||
if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl()) {
|
||||
|
|
|
@ -1283,6 +1283,174 @@ exit:
|
|||
ret void
|
||||
}
|
||||
|
||||
declare void @a()
|
||||
declare void @b()
|
||||
|
||||
define void @test_forked_hot_diamond(i32* %a) {
|
||||
; Test that a hot-branch with probability > 80% followed by a 50/50 branch
|
||||
; will not place the cold predecessor if the probability for the fallthrough
|
||||
; remains above 80%
|
||||
; CHECK-LABEL: test_forked_hot_diamond
|
||||
; CHECK: %entry
|
||||
; CHECK: %then
|
||||
; CHECK: %fork1
|
||||
; CHECK: %else
|
||||
; CHECK: %fork2
|
||||
; CHECK: %exit
|
||||
entry:
|
||||
%gep1 = getelementptr i32, i32* %a, i32 1
|
||||
%val1 = load i32, i32* %gep1
|
||||
%cond1 = icmp ugt i32 %val1, 1
|
||||
br i1 %cond1, label %then, label %else, !prof !5
|
||||
|
||||
then:
|
||||
call void @hot_function()
|
||||
%gep2 = getelementptr i32, i32* %a, i32 2
|
||||
%val2 = load i32, i32* %gep2
|
||||
%cond2 = icmp ugt i32 %val2, 2
|
||||
br i1 %cond2, label %fork1, label %fork2, !prof !8
|
||||
|
||||
else:
|
||||
call void @cold_function()
|
||||
%gep3 = getelementptr i32, i32* %a, i32 3
|
||||
%val3 = load i32, i32* %gep3
|
||||
%cond3 = icmp ugt i32 %val3, 3
|
||||
br i1 %cond3, label %fork1, label %fork2, !prof !8
|
||||
|
||||
fork1:
|
||||
call void @a()
|
||||
br label %exit
|
||||
|
||||
fork2:
|
||||
call void @b()
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
call void @hot_function()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_forked_hot_diamond_gets_cold(i32* %a) {
|
||||
; Test that a hot-branch with probability > 80% followed by a 50/50 branch
|
||||
; will place the cold predecessor if the probability for the fallthrough
|
||||
; falls below 80%
|
||||
; The probability for both branches is 85%. For then2 vs else1
|
||||
; this results in a compounded probability of 83%.
|
||||
; Neither then2->fork1 nor then2->fork2 has a large enough relative
|
||||
; probability to break the CFG.
|
||||
; Relative probs:
|
||||
; then2 -> fork1 vs else1 -> fork1 = 71%
|
||||
; then2 -> fork2 vs else2 -> fork2 = 74%
|
||||
; CHECK-LABEL: test_forked_hot_diamond_gets_cold
|
||||
; CHECK: %entry
|
||||
; CHECK: %then1
|
||||
; CHECK: %then2
|
||||
; CHECK: %else1
|
||||
; CHECK: %fork1
|
||||
; CHECK: %else2
|
||||
; CHECK: %fork2
|
||||
; CHECK: %exit
|
||||
entry:
|
||||
%gep1 = getelementptr i32, i32* %a, i32 1
|
||||
%val1 = load i32, i32* %gep1
|
||||
%cond1 = icmp ugt i32 %val1, 1
|
||||
br i1 %cond1, label %then1, label %else1, !prof !9
|
||||
|
||||
then1:
|
||||
call void @hot_function()
|
||||
%gep2 = getelementptr i32, i32* %a, i32 2
|
||||
%val2 = load i32, i32* %gep2
|
||||
%cond2 = icmp ugt i32 %val2, 2
|
||||
br i1 %cond2, label %then2, label %else2, !prof !9
|
||||
|
||||
else1:
|
||||
call void @cold_function()
|
||||
br label %fork1
|
||||
|
||||
then2:
|
||||
call void @hot_function()
|
||||
%gep3 = getelementptr i32, i32* %a, i32 3
|
||||
%val3 = load i32, i32* %gep2
|
||||
%cond3 = icmp ugt i32 %val2, 3
|
||||
br i1 %cond3, label %fork1, label %fork2, !prof !8
|
||||
|
||||
else2:
|
||||
call void @cold_function()
|
||||
br label %fork2
|
||||
|
||||
fork1:
|
||||
call void @a()
|
||||
br label %exit
|
||||
|
||||
fork2:
|
||||
call void @b()
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
call void @hot_function()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_forked_hot_diamond_stays_hot(i32* %a) {
|
||||
; Test that a hot-branch with probability > 88.88% (1:8) followed by a 50/50
|
||||
; branch will not place the cold predecessor as the probability for the
|
||||
; fallthrough stays above 80%
|
||||
; (1:8) followed by (1:1) is still (1:4)
|
||||
; Here we use 90% probability because two in a row
|
||||
; have a 89 % probability vs the original branch.
|
||||
; CHECK-LABEL: test_forked_hot_diamond_stays_hot
|
||||
; CHECK: %entry
|
||||
; CHECK: %then1
|
||||
; CHECK: %then2
|
||||
; CHECK: %fork1
|
||||
; CHECK: %else1
|
||||
; CHECK: %else2
|
||||
; CHECK: %fork2
|
||||
; CHECK: %exit
|
||||
entry:
|
||||
%gep1 = getelementptr i32, i32* %a, i32 1
|
||||
%val1 = load i32, i32* %gep1
|
||||
%cond1 = icmp ugt i32 %val1, 1
|
||||
br i1 %cond1, label %then1, label %else1, !prof !10
|
||||
|
||||
then1:
|
||||
call void @hot_function()
|
||||
%gep2 = getelementptr i32, i32* %a, i32 2
|
||||
%val2 = load i32, i32* %gep2
|
||||
%cond2 = icmp ugt i32 %val2, 2
|
||||
br i1 %cond2, label %then2, label %else2, !prof !10
|
||||
|
||||
else1:
|
||||
call void @cold_function()
|
||||
br label %fork1
|
||||
|
||||
then2:
|
||||
call void @hot_function()
|
||||
%gep3 = getelementptr i32, i32* %a, i32 3
|
||||
%val3 = load i32, i32* %gep2
|
||||
%cond3 = icmp ugt i32 %val2, 3
|
||||
br i1 %cond3, label %fork1, label %fork2, !prof !8
|
||||
|
||||
else2:
|
||||
call void @cold_function()
|
||||
br label %fork2
|
||||
|
||||
fork1:
|
||||
call void @a()
|
||||
br label %exit
|
||||
|
||||
fork2:
|
||||
call void @b()
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
call void @hot_function()
|
||||
ret void
|
||||
}
|
||||
|
||||
!5 = !{!"branch_weights", i32 84, i32 16}
|
||||
!6 = !{!"function_entry_count", i32 10}
|
||||
!7 = !{!"branch_weights", i32 60, i32 40}
|
||||
!8 = !{!"branch_weights", i32 5001, i32 4999}
|
||||
!9 = !{!"branch_weights", i32 85, i32 15}
|
||||
!10 = !{!"branch_weights", i32 90, i32 10}
|
||||
|
|
Loading…
Reference in New Issue