Follow-up patch of http://reviews.llvm.org/D19948 to handle missing profiles when simplifying CFG.

Summary: Set default branch weight to 1:1 if one of the branch has profile missing when simplifying CFG.

Reviewers: spatel, davidxl

Subscribers: danielcdh, llvm-commits

Differential Revision: http://reviews.llvm.org/D20307

llvm-svn: 269995
This commit is contained in:
Dehao Chen 2016-05-18 22:41:03 +00:00
parent c01919e796
commit f16376b505
2 changed files with 70 additions and 55 deletions

View File

@ -2094,6 +2094,29 @@ static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) {
return false; return false;
} }
/// Return true if either PBI or BI has branch weight available, and store
/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
/// not have branch weight, use 1:1 as its weight.
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
uint64_t &PredTrueWeight,
uint64_t &PredFalseWeight,
uint64_t &SuccTrueWeight,
uint64_t &SuccFalseWeight) {
bool PredHasWeights =
PBI->extractProfMetadata(PredTrueWeight, PredFalseWeight);
bool SuccHasWeights =
BI->extractProfMetadata(SuccTrueWeight, SuccFalseWeight);
if (PredHasWeights || SuccHasWeights) {
if (!PredHasWeights)
PredTrueWeight = PredFalseWeight = 1;
if (!SuccHasWeights)
SuccTrueWeight = SuccFalseWeight = 1;
return true;
} else {
return false;
}
}
/// If this basic block is simple enough, and if a predecessor branches to us /// If this basic block is simple enough, and if a predecessor branches to us
/// and one of our successors, fold the block into the predecessor and use /// and one of our successors, fold the block into the predecessor and use
/// logical operations to pick the right destination. /// logical operations to pick the right destination.
@ -2281,14 +2304,13 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
PBI->setCondition(NewCond); PBI->setCondition(NewCond);
uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
bool PredHasWeights = bool HasWeights =
PBI->extractProfMetadata(PredTrueWeight, PredFalseWeight); extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
bool SuccHasWeights = SuccTrueWeight, SuccFalseWeight);
BI->extractProfMetadata(SuccTrueWeight, SuccFalseWeight);
SmallVector<uint64_t, 8> NewWeights; SmallVector<uint64_t, 8> NewWeights;
if (PBI->getSuccessor(0) == BB) { if (PBI->getSuccessor(0) == BB) {
if (PredHasWeights && SuccHasWeights) { if (HasWeights) {
// PBI: br i1 %x, BB, FalseDest // PBI: br i1 %x, BB, FalseDest
// BI: br i1 %y, TrueDest, FalseDest // BI: br i1 %y, TrueDest, FalseDest
// TrueWeight is TrueWeight for PBI * TrueWeight for BI. // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
@ -2305,7 +2327,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
PBI->setSuccessor(0, TrueDest); PBI->setSuccessor(0, TrueDest);
} }
if (PBI->getSuccessor(1) == BB) { if (PBI->getSuccessor(1) == BB) {
if (PredHasWeights && SuccHasWeights) { if (HasWeights) {
// PBI: br i1 %x, TrueDest, BB // PBI: br i1 %x, TrueDest, BB
// BI: br i1 %y, TrueDest, FalseDest // BI: br i1 %y, TrueDest, FalseDest
// TrueWeight is TrueWeight for PBI * TotalWeight for BI + // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
@ -2840,18 +2862,10 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// Update branch weight for PBI. // Update branch weight for PBI.
uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
uint64_t PredCommon, PredOther, SuccCommon, SuccOther; uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
bool PredHasWeights = bool HasWeights =
PBI->extractProfMetadata(PredTrueWeight, PredFalseWeight); extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
bool SuccHasWeights = SuccTrueWeight, SuccFalseWeight);
BI->extractProfMetadata(SuccTrueWeight, SuccFalseWeight);
bool HasWeights = PredHasWeights || SuccHasWeights;
if (HasWeights) { if (HasWeights) {
if (!PredHasWeights) {
PredFalseWeight = PredTrueWeight = 1;
}
if (!SuccHasWeights) {
SuccFalseWeight = SuccTrueWeight = 1;
}
PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight; PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
PredOther = PBIOp ? PredTrueWeight : PredFalseWeight; PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight; SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
@ -2893,7 +2907,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// weights for PBI do not apply to the new select because the select's // weights for PBI do not apply to the new select because the select's
// 'logical' edges are incoming edges of the phi that is eliminated, not // 'logical' edges are incoming edges of the phi that is eliminated, not
// the outgoing edges of PBI. // the outgoing edges of PBI.
if (PredHasWeights && SuccHasWeights) { if (HasWeights) {
uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight; uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight; uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight; uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;

View File

@ -29,8 +29,7 @@ define void @fake_weights(i1 %a, i1 %b) {
entry: entry:
br i1 %a, label %Y, label %X, !prof !12 br i1 %a, label %Y, label %X, !prof !12
; CHECK: %or.cond = and i1 %a.not, %c ; CHECK: %or.cond = and i1 %a.not, %c
; CHECK-NEXT: br i1 %or.cond, label %Z, label %Y ; CHECK-NEXT: br i1 %or.cond, label %Z, label %Y, !prof !1
; CHECK-NOT: !prof !0
; CHECK: Y: ; CHECK: Y:
X: X:
%c = or i1 %b, false %c = or i1 %b, false
@ -49,7 +48,7 @@ define void @test2(i1 %a, i1 %b) {
; CHECK-LABEL: @test2( ; CHECK-LABEL: @test2(
entry: entry:
br i1 %a, label %X, label %Y, !prof !1 br i1 %a, label %X, label %Y, !prof !1
; CHECK: br i1 %or.cond, label %Z, label %Y, !prof !1 ; CHECK: br i1 %or.cond, label %Z, label %Y, !prof !2
; CHECK-NOT: !prof ; CHECK-NOT: !prof
X: X:
@ -67,7 +66,7 @@ Z:
define void @test3(i1 %a, i1 %b) { define void @test3(i1 %a, i1 %b) {
; CHECK-LABEL: @test3( ; CHECK-LABEL: @test3(
; CHECK-NOT: !prof ; CHECK: br i1 %or.cond, label %Z, label %Y, !prof !1
entry: entry:
br i1 %a, label %X, label %Y, !prof !1 br i1 %a, label %X, label %Y, !prof !1
@ -86,7 +85,7 @@ Z:
define void @test4(i1 %a, i1 %b) { define void @test4(i1 %a, i1 %b) {
; CHECK-LABEL: @test4( ; CHECK-LABEL: @test4(
; CHECK-NOT: !prof ; CHECK: br i1 %or.cond, label %Z, label %Y, !prof !1
entry: entry:
br i1 %a, label %X, label %Y br i1 %a, label %X, label %Y
@ -115,7 +114,7 @@ entry:
; CHECK: switch i32 %N, label %sw2 [ ; CHECK: switch i32 %N, label %sw2 [
; CHECK: i32 3, label %sw.bb1 ; CHECK: i32 3, label %sw.bb1
; CHECK: i32 2, label %sw.bb ; CHECK: i32 2, label %sw.bb
; CHECK: ], !prof !2 ; CHECK: ], !prof !3
sw.bb: sw.bb:
call void @helper(i32 0) call void @helper(i32 0)
@ -148,7 +147,7 @@ entry:
; CHECK: i32 3, label %sw.bb1 ; CHECK: i32 3, label %sw.bb1
; CHECK: i32 2, label %sw.bb ; CHECK: i32 2, label %sw.bb
; CHECK: i32 4, label %sw.bb5 ; CHECK: i32 4, label %sw.bb5
; CHECK: ], !prof !3 ; CHECK: ], !prof !4
sw.bb: sw.bb:
call void @helper(i32 0) call void @helper(i32 0)
@ -183,7 +182,7 @@ define void @test1_swap(i1 %a, i1 %b) {
; CHECK-LABEL: @test1_swap( ; CHECK-LABEL: @test1_swap(
entry: entry:
br i1 %a, label %Y, label %X, !prof !0 br i1 %a, label %Y, label %X, !prof !0
; CHECK: br i1 %or.cond, label %Y, label %Z, !prof !4 ; CHECK: br i1 %or.cond, label %Y, label %Z, !prof !5
X: X:
%c = or i1 %b, false %c = or i1 %b, false
@ -203,7 +202,7 @@ define void @test7(i1 %a, i1 %b) {
entry: entry:
%c = or i1 %b, false %c = or i1 %b, false
br i1 %a, label %Y, label %X, !prof !0 br i1 %a, label %Y, label %X, !prof !0
; CHECK: br i1 %brmerge, label %Y, label %Z, !prof !5 ; CHECK: br i1 %brmerge, label %Y, label %Z, !prof !6
X: X:
br i1 %c, label %Y, label %Z, !prof !6 br i1 %c, label %Y, label %Z, !prof !6
@ -222,7 +221,7 @@ define void @test8(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: @test8( ; CHECK-LABEL: @test8(
entry: entry:
%lt = icmp slt i64 %x, %y %lt = icmp slt i64 %x, %y
; CHECK: br i1 %lt, label %a, label %b, !prof !6 ; CHECK: br i1 %lt, label %a, label %b, !prof !7
%qux = select i1 %lt, i32 0, i32 2 %qux = select i1 %lt, i32 0, i32 2
switch i32 %qux, label %bees [ switch i32 %qux, label %bees [
i32 0, label %a i32 0, label %a
@ -255,7 +254,7 @@ entry:
; CHECK: i32 1, label %end ; CHECK: i32 1, label %end
; CHECK: i32 2, label %end ; CHECK: i32 2, label %end
; CHECK: i32 92, label %end ; CHECK: i32 92, label %end
; CHECK: ], !prof !7 ; CHECK: ], !prof !8
a: a:
call void @helper(i32 0) nounwind call void @helper(i32 0) nounwind
@ -293,7 +292,7 @@ lor.end:
; CHECK-LABEL: @test10( ; CHECK-LABEL: @test10(
; CHECK: %x.off = add i32 %x, -1 ; CHECK: %x.off = add i32 %x, -1
; CHECK: %switch = icmp ult i32 %x.off, 3 ; CHECK: %switch = icmp ult i32 %x.off, 3
; CHECK: br i1 %switch, label %lor.end, label %lor.rhs, !prof !8 ; CHECK: br i1 %switch, label %lor.end, label %lor.rhs, !prof !9
} }
; Remove dead cases from the switch. ; Remove dead cases from the switch.
@ -305,7 +304,7 @@ define void @test11(i32 %x) nounwind {
], !prof !8 ], !prof !8
; CHECK-LABEL: @test11( ; CHECK-LABEL: @test11(
; CHECK: %cond = icmp eq i32 %i, 24 ; CHECK: %cond = icmp eq i32 %i, 24
; CHECK: br i1 %cond, label %c, label %a, !prof !9 ; CHECK: br i1 %cond, label %c, label %a, !prof !10
a: a:
call void @helper(i32 0) nounwind call void @helper(i32 0) nounwind
@ -368,7 +367,7 @@ c:
@max_regno = common global i32 0, align 4 @max_regno = common global i32 0, align 4
define void @test14(i32* %old, i32 %final) { define void @test14(i32* %old, i32 %final) {
; CHECK-LABEL: @test14 ; CHECK-LABEL: @test14
; CHECK: br i1 %or.cond, label %for.exit, label %for.inc, !prof !10 ; CHECK: br i1 %or.cond, label %for.exit, label %for.inc, !prof !11
for.cond: for.cond:
br label %for.cond2 br label %for.cond2
for.cond2: for.cond2:
@ -394,7 +393,7 @@ define i32 @HoistThenElseCodeToIf(i32 %n) {
; CHECK-LABEL: @HoistThenElseCodeToIf( ; CHECK-LABEL: @HoistThenElseCodeToIf(
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 %n, 0 ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 %n, 0
; CHECK-NEXT: [[DOT:%.*]] = select i1 [[TOBOOL]], i32 1, i32 234, !prof !11 ; CHECK-NEXT: [[DOT:%.*]] = select i1 [[TOBOOL]], i32 1, i32 234, !prof !12
; CHECK-NEXT: ret i32 [[DOT]] ; CHECK-NEXT: ret i32 [[DOT]]
; ;
entry: entry:
@ -418,8 +417,8 @@ define i32 @SimplifyCondBranchToCondBranch(i1 %cmpa, i1 %cmpb) {
; CHECK-LABEL: @SimplifyCondBranchToCondBranch( ; CHECK-LABEL: @SimplifyCondBranchToCondBranch(
; CHECK-NEXT: block1: ; CHECK-NEXT: block1:
; CHECK-NEXT: [[BRMERGE:%.*]] = or i1 %cmpa, %cmpb ; CHECK-NEXT: [[BRMERGE:%.*]] = or i1 %cmpa, %cmpb
; CHECK-NEXT: [[DOTMUX:%.*]] = select i1 %cmpa, i32 0, i32 2, !prof !12 ; CHECK-NEXT: [[DOTMUX:%.*]] = select i1 %cmpa, i32 0, i32 2, !prof !13
; CHECK-NEXT: [[OUTVAL:%.*]] = select i1 [[BRMERGE]], i32 [[DOTMUX]], i32 1, !prof !13 ; CHECK-NEXT: [[OUTVAL:%.*]] = select i1 [[BRMERGE]], i32 [[DOTMUX]], i32 1, !prof !14
; CHECK-NEXT: ret i32 [[OUTVAL]] ; CHECK-NEXT: ret i32 [[OUTVAL]]
; ;
block1: block1:
@ -445,8 +444,8 @@ define i32 @SimplifyCondBranchToCondBranchSwap(i1 %cmpa, i1 %cmpb) {
; CHECK-NEXT: [[CMPA_NOT:%.*]] = xor i1 %cmpa, true ; CHECK-NEXT: [[CMPA_NOT:%.*]] = xor i1 %cmpa, true
; CHECK-NEXT: [[CMPB_NOT:%.*]] = xor i1 %cmpb, true ; CHECK-NEXT: [[CMPB_NOT:%.*]] = xor i1 %cmpb, true
; CHECK-NEXT: [[BRMERGE:%.*]] = or i1 [[CMPA_NOT]], [[CMPB_NOT]] ; CHECK-NEXT: [[BRMERGE:%.*]] = or i1 [[CMPA_NOT]], [[CMPB_NOT]]
; CHECK-NEXT: [[DOTMUX:%.*]] = select i1 [[CMPA_NOT]], i32 0, i32 2, !prof !14 ; CHECK-NEXT: [[DOTMUX:%.*]] = select i1 [[CMPA_NOT]], i32 0, i32 2, !prof !15
; CHECK-NEXT: [[OUTVAL:%.*]] = select i1 [[BRMERGE]], i32 [[DOTMUX]], i32 1, !prof !15 ; CHECK-NEXT: [[OUTVAL:%.*]] = select i1 [[BRMERGE]], i32 [[DOTMUX]], i32 1, !prof !16
; CHECK-NEXT: ret i32 [[OUTVAL]] ; CHECK-NEXT: ret i32 [[OUTVAL]]
; ;
block1: block1:
@ -470,8 +469,8 @@ define i32 @SimplifyCondBranchToCondBranchSwapMissingWeight(i1 %cmpa, i1 %cmpb)
; CHECK-NEXT: [[CMPA_NOT:%.*]] = xor i1 %cmpa, true ; CHECK-NEXT: [[CMPA_NOT:%.*]] = xor i1 %cmpa, true
; CHECK-NEXT: [[CMPB_NOT:%.*]] = xor i1 %cmpb, true ; CHECK-NEXT: [[CMPB_NOT:%.*]] = xor i1 %cmpb, true
; CHECK-NEXT: [[BRMERGE:%.*]] = or i1 [[CMPA_NOT]], [[CMPB_NOT]] ; CHECK-NEXT: [[BRMERGE:%.*]] = or i1 [[CMPA_NOT]], [[CMPB_NOT]]
; CHECK-NEXT: [[DOTMUX:%.*]] = select i1 [[CMPA_NOT]], i32 0, i32 2 ; CHECK-NEXT: [[DOTMUX:%.*]] = select i1 [[CMPA_NOT]], i32 0, i32 2, !prof !17
; CHECK-NEXT: [[OUTVAL:%.*]] = select i1 [[BRMERGE]], i32 [[DOTMUX]], i32 1, !prof !16 ; CHECK-NEXT: [[OUTVAL:%.*]] = select i1 [[BRMERGE]], i32 [[DOTMUX]], i32 1, !prof !18
; CHECK-NEXT: ret i32 [[OUTVAL]] ; CHECK-NEXT: ret i32 [[OUTVAL]]
; ;
block1: block1:
@ -506,21 +505,23 @@ exit:
!14 = !{!"branch_weights", i32 4, i32 7} !14 = !{!"branch_weights", i32 4, i32 7}
; CHECK: !0 = !{!"branch_weights", i32 5, i32 11} ; CHECK: !0 = !{!"branch_weights", i32 5, i32 11}
; CHECK: !1 = !{!"branch_weights", i32 1, i32 5} ; CHECK: !1 = !{!"branch_weights", i32 1, i32 3}
; CHECK: !2 = !{!"branch_weights", i32 7, i32 1, i32 2} ; CHECK: !2 = !{!"branch_weights", i32 1, i32 5}
; CHECK: !3 = !{!"branch_weights", i32 49, i32 12, i32 24, i32 35} ; CHECK: !3 = !{!"branch_weights", i32 7, i32 1, i32 2}
; CHECK: !4 = !{!"branch_weights", i32 11, i32 5} ; CHECK: !4 = !{!"branch_weights", i32 49, i32 12, i32 24, i32 35}
; CHECK: !5 = !{!"branch_weights", i32 17, i32 15} ; CHECK: !5 = !{!"branch_weights", i32 11, i32 5}
; CHECK: !6 = !{!"branch_weights", i32 9, i32 7} ; CHECK: !6 = !{!"branch_weights", i32 17, i32 15}
; CHECK: !7 = !{!"branch_weights", i32 17, i32 9, i32 8, i32 7, i32 17} ; CHECK: !7 = !{!"branch_weights", i32 9, i32 7}
; CHECK: !8 = !{!"branch_weights", i32 24, i32 33} ; CHECK: !8 = !{!"branch_weights", i32 17, i32 9, i32 8, i32 7, i32 17}
; CHECK: !9 = !{!"branch_weights", i32 8, i32 33} ; CHECK: !9 = !{!"branch_weights", i32 24, i32 33}
; CHECK: !10 = !{!"branch_weights", i32 8, i32 33}
;; The false weight prints out as a negative integer here, but inside llvm, we ;; The false weight prints out as a negative integer here, but inside llvm, we
;; treat the weight as an unsigned integer. ;; treat the weight as an unsigned integer.
; CHECK: !10 = !{!"branch_weights", i32 112017436, i32 -735157296} ; CHECK: !11 = !{!"branch_weights", i32 112017436, i32 -735157296}
; CHECK: !11 = !{!"branch_weights", i32 3, i32 5} ; CHECK: !12 = !{!"branch_weights", i32 3, i32 5}
; CHECK: !12 = !{!"branch_weights", i32 22, i32 12} ; CHECK: !13 = !{!"branch_weights", i32 22, i32 12}
; CHECK: !13 = !{!"branch_weights", i32 34, i32 21} ; CHECK: !14 = !{!"branch_weights", i32 34, i32 21}
; CHECK: !14 = !{!"branch_weights", i32 33, i32 14} ; CHECK: !15 = !{!"branch_weights", i32 33, i32 14}
; CHECK: !15 = !{!"branch_weights", i32 47, i32 8} ; CHECK: !16 = !{!"branch_weights", i32 47, i32 8}
; CHECK: !16 = !{!"branch_weights", i32 8, i32 2} ; CHECK: !17 = !{!"branch_weights", i32 6, i32 2}
; CHECK: !18 = !{!"branch_weights", i32 8, i32 2}