forked from OSchip/llvm-project
[SimplifyCFG] Fix a cost modeling oversight in branch commoning
The cost modeling was not accounting for the fact we were duplicating the instruction once per predecessor. With a default threshold of 1, this meant we were actually creating #pred copies. Adding to the fun, there is *absolutely no* test coverage for this. Simply bailing for more than one predecessor passes all checked in tests. llvm-svn: 341001
This commit is contained in:
parent
32a8a2028c
commit
6bd16b5850
|
@ -2539,6 +2539,8 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
|
|||
bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
|
||||
BasicBlock *BB = BI->getParent();
|
||||
|
||||
const unsigned PredCount = pred_size(BB);
|
||||
|
||||
Instruction *Cond = nullptr;
|
||||
if (BI->isConditional())
|
||||
Cond = dyn_cast<Instruction>(BI->getCondition());
|
||||
|
@ -2588,7 +2590,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
|
|||
// too many instructions and these involved instructions can be executed
|
||||
// unconditionally. We denote all involved instructions except the condition
|
||||
// as "bonus instructions", and only allow this transformation when the
|
||||
// number of the bonus instructions does not exceed a certain threshold.
|
||||
// number of the bonus instructions we'll need to create when cloning into
|
||||
// each predecessor does not exceed a certain threshold.
|
||||
unsigned NumBonusInsts = 0;
|
||||
for (auto I = BB->begin(); Cond != &*I; ++I) {
|
||||
// Ignore dbg intrinsics.
|
||||
|
@ -2603,7 +2606,10 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
|
|||
// I is used in the same BB. Since BI uses Cond and doesn't have more slots
|
||||
// to use any other instruction, User must be an instruction between next(I)
|
||||
// and Cond.
|
||||
++NumBonusInsts;
|
||||
|
||||
// Account for the cost of duplicating this instruction into each
|
||||
// predecessor.
|
||||
NumBonusInsts += PredCount;
|
||||
// Early exits once we reach the limit.
|
||||
if (NumBonusInsts > BonusInstThreshold)
|
||||
return false;
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
; RUN: opt %s -simplifycfg -S | FileCheck %s --check-prefix=NORMAL
|
||||
; RUN: opt %s -simplifycfg -S -bonus-inst-threshold=2 | FileCheck %s --check-prefix=AGGRESSIVE
|
||||
; RUN: opt %s -simplifycfg -S -bonus-inst-threshold=4 | FileCheck %s --check-prefix=WAYAGGRESSIVE
|
||||
|
||||
define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) {
|
||||
; NORMAL-LABEL: @foo(
|
||||
|
@ -26,3 +27,54 @@ cond.end:
|
|||
%cond = phi i32 [ %0, %cond.false ], [ 0, %lor.lhs.false ], [ 0, %entry ]
|
||||
ret i32 %cond
|
||||
}
|
||||
|
||||
declare void @distinct_a();
|
||||
declare void @distinct_b();
|
||||
|
||||
;; Like foo, but have to duplicate into multiple predecessors
|
||||
define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) {
|
||||
; NORMAL-LABEL: @bar(
|
||||
; AGGRESSIVE-LABEL: @bar(
|
||||
entry:
|
||||
%cmp_split = icmp slt i32 %d, %b
|
||||
%cmp = icmp sgt i32 %d, 3
|
||||
br i1 %cmp_split, label %pred_a, label %pred_b
|
||||
pred_a:
|
||||
; NORMAL-LABEL: pred_a:
|
||||
; AGGRESSIVE-LABEL: pred_a:
|
||||
; WAYAGGRESSIVE-LABEL: pred_a:
|
||||
; NORMAL: br i1
|
||||
; AGGRESSIVE: br i1
|
||||
; WAYAGGRESSIVE: br i1
|
||||
call void @distinct_a();
|
||||
br i1 %cmp, label %cond.end, label %lor.lhs.false
|
||||
pred_b:
|
||||
; NORMAL-LABEL: pred_b:
|
||||
; AGGRESSIVE-LABEL: pred_b:
|
||||
; WAYAGGRESSIVE-LABEL: pred_b:
|
||||
; NORMAL: br i1
|
||||
; AGGRESSIVE: br i1
|
||||
; WAYAGGRESSIVE: br i1
|
||||
call void @distinct_b();
|
||||
br i1 %cmp, label %cond.end, label %lor.lhs.false
|
||||
|
||||
lor.lhs.false:
|
||||
%mul = shl i32 %c, 1
|
||||
%add = add nsw i32 %mul, %a
|
||||
%cmp1 = icmp slt i32 %add, %b
|
||||
br i1 %cmp1, label %cond.false, label %cond.end
|
||||
; NORMAL-LABEL: lor.lhs.false:
|
||||
; AGGRESIVE-LABEL: lor.lhs.false:
|
||||
; WAYAGGRESIVE-LABEL: lor.lhs.false:
|
||||
; NORMAL: br i1
|
||||
; AGGRESSIVE: br i1
|
||||
; WAYAGGRESSIVE-NOT: br i1
|
||||
|
||||
cond.false:
|
||||
%0 = load i32, i32* %input, align 4
|
||||
br label %cond.end
|
||||
|
||||
cond.end:
|
||||
%cond = phi i32 [ %0, %cond.false ], [ 0, %lor.lhs.false ],[ 0, %pred_a ],[ 0, %pred_b ]
|
||||
ret i32 %cond
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue