forked from OSchip/llvm-project
CodeGen: BlockPlacement: Minor probability changes.
Qin may be large, and Succ may be more frequent than BB. Take these both into account when deciding if tail-duplication is profitable. llvm-svn: 299891
This commit is contained in:
parent
a12bd756e4
commit
ee51a20164
|
@ -761,57 +761,65 @@ bool MachineBlockPlacement::isProfitableToTailDup(
|
|||
// Cost in the first case is: P + V
|
||||
// For this calculation, we always assume P > Qout. If Qout > P
|
||||
// The result of this function will be ignored at the caller.
|
||||
// Cost in the second case is: Qout + Qin * U + P * V
|
||||
// Let F = SuccFreq - Qin
|
||||
// Cost in the second case is: Qout + min(Qin, F) * U + max(Qin, F) * V
|
||||
|
||||
if (PDom == nullptr || !Succ->isSuccessor(PDom)) {
|
||||
BranchProbability UProb = BestSuccSucc;
|
||||
BranchProbability VProb = AdjustedSuccSumProb - UProb;
|
||||
BlockFrequency F = SuccFreq - Qin;
|
||||
BlockFrequency V = SuccFreq * VProb;
|
||||
BlockFrequency QinU = Qin * UProb;
|
||||
BlockFrequency QinU = std::min(Qin, F) * UProb;
|
||||
BlockFrequency BaseCost = P + V;
|
||||
BlockFrequency DupCost = Qout + QinU + P * VProb;
|
||||
BlockFrequency DupCost = Qout + QinU + std::max(Qin, F) * VProb;
|
||||
return greaterWithBias(BaseCost, DupCost, EntryFreq);
|
||||
}
|
||||
BranchProbability UProb = MBPI->getEdgeProbability(Succ, PDom);
|
||||
BranchProbability VProb = AdjustedSuccSumProb - UProb;
|
||||
BlockFrequency U = SuccFreq * UProb;
|
||||
BlockFrequency V = SuccFreq * VProb;
|
||||
BlockFrequency F = SuccFreq - Qin;
|
||||
// If there is a post-dominating successor, here is the calculation:
|
||||
// BB BB BB BB
|
||||
// | \Qout | \ | \Qout | \
|
||||
// |P C | = |P C | =
|
||||
// = C' |P C = C' |P C
|
||||
// | /Qin | | | /Qin | |
|
||||
// | / | C' (+Succ) | / | C' (+Succ)
|
||||
// Succ Succ /| Succ Succ /|
|
||||
// | \ V | \/ | | \ V | \/ |
|
||||
// |U \ |U /\ | |U = |U /\ |
|
||||
// = D = = \= | D | = =|
|
||||
// | / |/ D | / |/ D
|
||||
// | / | / | = | /
|
||||
// |/ | / |/ | =
|
||||
// Dom Dom Dom Dom
|
||||
// | \Qout | \ | \Qout | \
|
||||
// |P C | = |P C | =
|
||||
// = C' |P C = C' |P C
|
||||
// | /Qin | | | /Qin | |
|
||||
// | / | C' (+Succ) | / | C' (+Succ)
|
||||
// Succ Succ /| Succ Succ /|
|
||||
// | \ V | \/ | | \ V | \/ |
|
||||
// |U \ |U /\ =? |U = |U /\ |
|
||||
// = D = = =?| | D | = =|
|
||||
// | / |/ D | / |/ D
|
||||
// | / | / | = | /
|
||||
// |/ | / |/ | =
|
||||
// Dom Dom Dom Dom
|
||||
// '=' : Branch taken for that CFG edge
|
||||
// The cost for taken branches in the first case is P + U
|
||||
// Let F = SuccFreq - Qin
|
||||
// The cost in the second case (assuming independence), given the layout:
|
||||
// BB, Succ, (C+Succ), D, Dom
|
||||
// is Qout + P * V + Qin * U
|
||||
// BB, Succ, (C+Succ), D, Dom or the layout:
|
||||
// BB, Succ, D, Dom, (C+Succ)
|
||||
// is Qout + max(F, Qin) * U + min(F, Qin)
|
||||
// compare P + U vs Qout + P * U + Qin.
|
||||
//
|
||||
// The 3rd and 4th cases cover when Dom would be chosen to follow Succ.
|
||||
//
|
||||
// For the 3rd case, the cost is P + 2 * V
|
||||
// For the 4th case, the cost is Qout + Qin * U + P * V + V
|
||||
// We choose 4 over 3 when (P + V) > Qout + Qin * U + P * V
|
||||
// For the 4th case, the cost is Qout + min(Qin, F) * U + max(Qin, F) * V + V
|
||||
// We choose 4 over 3 when (P + V) > Qout + min(Qin, F) * U + max(Qin, F) * V
|
||||
if (UProb > AdjustedSuccSumProb / 2 &&
|
||||
!hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,
|
||||
Chain, BlockFilter))
|
||||
// Cases 3 & 4
|
||||
return greaterWithBias((P + V), (Qout + Qin * UProb + P * VProb),
|
||||
EntryFreq);
|
||||
return greaterWithBias(
|
||||
(P + V), (Qout + std::max(Qin, F) * VProb + std::min(Qin, F) * UProb),
|
||||
EntryFreq);
|
||||
// Cases 1 & 2
|
||||
return greaterWithBias(
|
||||
(P + U), (Qout + Qin * AdjustedSuccSumProb + P * UProb), EntryFreq);
|
||||
return greaterWithBias((P + U),
|
||||
(Qout + std::min(Qin, F) * AdjustedSuccSumProb +
|
||||
std::max(Qin, F) * UProb),
|
||||
EntryFreq);
|
||||
}
|
||||
|
||||
/// Check for a trellis layout. \p BB is the upper part of a trellis if its
|
||||
|
|
|
@ -264,7 +264,7 @@ declare void @do_something() #1
|
|||
define i32 @do_nothing_if_resultant_opcodes_would_differ() #0 {
|
||||
; CHECK-LABEL: do_nothing_if_resultant_opcodes_would_differ
|
||||
; CHECK: cmn
|
||||
; CHECK: b.le
|
||||
; CHECK: b.gt
|
||||
; CHECK: cmp
|
||||
; CHECK: b.gt
|
||||
entry:
|
||||
|
|
|
@ -1454,9 +1454,50 @@ exit:
|
|||
ret void
|
||||
}
|
||||
|
||||
; Because %endif has a higher frequency than %if, the calculations show we
|
||||
; shouldn't tail-duplicate %endif so that we can place it after %if. We were
|
||||
; previously undercounting the cost by ignoring execution frequency that didn't
|
||||
; come from the %if->%endif path.
|
||||
; CHECK-LABEL: higher_frequency_succ_tail_dup
|
||||
; CHECK: %entry
|
||||
; CHECK: %elseif
|
||||
; CHECK: %else
|
||||
; CHECK: %endif
|
||||
; CHECK: %then
|
||||
; CHECK: %ret
|
||||
define void @higher_frequency_succ_tail_dup(i1 %a, i1 %b, i1 %c) {
|
||||
entry:
|
||||
br label %if
|
||||
if: ; preds = %entry
|
||||
call void @effect(i32 0)
|
||||
br i1 %a, label %elseif, label %endif, !prof !11 ; even
|
||||
|
||||
elseif: ; preds = %if
|
||||
call void @effect(i32 1)
|
||||
br i1 %b, label %else, label %endif, !prof !11 ; even
|
||||
|
||||
else: ; preds = %elseif
|
||||
call void @effect(i32 2)
|
||||
br label %endif
|
||||
|
||||
endif: ; preds = %if, %elseif, %else
|
||||
br i1 %c, label %then, label %ret, !prof !12 ; 5 to 3
|
||||
|
||||
then: ; preds = %endif
|
||||
call void @effect(i32 3)
|
||||
br label %ret
|
||||
|
||||
ret: ; preds = %endif, %then
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @effect(i32)
|
||||
|
||||
!5 = !{!"branch_weights", i32 84, i32 16}
|
||||
!6 = !{!"function_entry_count", i32 10}
|
||||
!7 = !{!"branch_weights", i32 60, i32 40}
|
||||
!8 = !{!"branch_weights", i32 5001, i32 4999}
|
||||
!9 = !{!"branch_weights", i32 85, i32 15}
|
||||
!10 = !{!"branch_weights", i32 90, i32 10}
|
||||
!11 = !{!"branch_weights", i32 1, i32 1}
|
||||
!12 = !{!"branch_weights", i32 5, i32 3}
|
||||
|
|
Loading…
Reference in New Issue