forked from OSchip/llvm-project
[SimplifyCFG] Consider cost of combining predicates.
Modify FoldBranchToCommonDest to consider the cost of inserting instructions when attempting to combine predicates to fold blocks. The threshold can be controlled via a new option: -simplifycfg-branch-fold-threshold which defaults to '2' to allow the insertion of a not and another logical operator. Differential Revision: https://reviews.llvm.org/D86526
This commit is contained in:
parent
713c2ad60c
commit
65f78e73ad
|
@ -199,6 +199,7 @@ bool FlattenCFG(BasicBlock *BB, AAResults *AA = nullptr);
|
|||
/// branches to us and one of our successors, fold the setcc into the
|
||||
/// predecessor and use logical operations to pick the right destination.
|
||||
bool FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU = nullptr,
|
||||
const TargetTransformInfo *TTI = nullptr,
|
||||
unsigned BonusInstThreshold = 1);
|
||||
|
||||
/// This function takes a virtual register computed by an Instruction and
|
||||
|
|
|
@ -143,6 +143,13 @@ MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10),
|
|||
cl::desc("Max size of a block which is still considered "
|
||||
"small enough to thread through"));
|
||||
|
||||
// Two is chosen to allow one negation and a logical combine.
|
||||
static cl::opt<unsigned>
|
||||
BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
|
||||
cl::init(2),
|
||||
cl::desc("Maximum cost of combining conditions when "
|
||||
"folding branches"));
|
||||
|
||||
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
|
||||
STATISTIC(NumLinearMaps,
|
||||
"Number of switch instructions turned into linear mapping");
|
||||
|
@ -2684,12 +2691,16 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
|
|||
/// and one of our successors, fold the block into the predecessor and use
|
||||
/// logical operations to pick the right destination.
|
||||
bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
|
||||
const TargetTransformInfo *TTI,
|
||||
unsigned BonusInstThreshold) {
|
||||
BasicBlock *BB = BI->getParent();
|
||||
|
||||
const unsigned PredCount = pred_size(BB);
|
||||
|
||||
bool Changed = false;
|
||||
TargetTransformInfo::TargetCostKind CostKind =
|
||||
BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize
|
||||
: TargetTransformInfo::TCK_SizeAndLatency;
|
||||
|
||||
Instruction *Cond = nullptr;
|
||||
if (BI->isConditional())
|
||||
|
@ -2818,6 +2829,19 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
|
|||
continue;
|
||||
}
|
||||
|
||||
// Check the cost of inserting the necessary logic before performing the
|
||||
// transformation.
|
||||
if (TTI && Opc != Instruction::BinaryOpsEnd) {
|
||||
Type *Ty = BI->getCondition()->getType();
|
||||
unsigned Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
|
||||
if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
|
||||
!isa<CmpInst>(PBI->getCondition())))
|
||||
Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
|
||||
|
||||
if (Cost > BranchFoldThreshold)
|
||||
continue;
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
|
||||
Changed = true;
|
||||
|
||||
|
@ -6013,7 +6037,7 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
|
|||
// branches to us and our successor, fold the comparison into the
|
||||
// predecessor and use logical operations to update the incoming value
|
||||
// for PHI nodes in common successor.
|
||||
if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold))
|
||||
if (FoldBranchToCommonDest(BI, nullptr, &TTI, Options.BonusInstThreshold))
|
||||
return requestResimplify();
|
||||
return false;
|
||||
}
|
||||
|
@ -6076,7 +6100,7 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
|
|||
// If this basic block is ONLY a compare and a branch, and if a predecessor
|
||||
// branches to us and one of our successors, fold the comparison into the
|
||||
// predecessor and use logical operations to pick the right destination.
|
||||
if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold))
|
||||
if (FoldBranchToCommonDest(BI, nullptr, &TTI, Options.BonusInstThreshold))
|
||||
return requestResimplify();
|
||||
|
||||
// We have a conditional branch to two blocks that are only reachable
|
||||
|
|
|
@ -169,19 +169,34 @@ cond.end:
|
|||
}
|
||||
|
||||
define i32 @or_predicate_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) #0 {
|
||||
; CHECK-LABEL: @or_predicate_minsize(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 3
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
|
||||
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]]
|
||||
; CHECK-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
|
||||
; CHECK: cond.false:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
|
||||
; CHECK-NEXT: br label [[COND_END]]
|
||||
; CHECK: cond.end:
|
||||
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: ret i32 [[COND]]
|
||||
; THUMB-LABEL: @or_predicate_minsize(
|
||||
; THUMB-NEXT: entry:
|
||||
; THUMB-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 3
|
||||
; THUMB-NEXT: br i1 [[CMP]], label [[COND_END:%.*]], label [[LOR_LHS_FALSE:%.*]]
|
||||
; THUMB: lor.lhs.false:
|
||||
; THUMB-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
|
||||
; THUMB-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
|
||||
; THUMB-NEXT: br i1 [[CMP1]], label [[COND_END]], label [[COND_FALSE:%.*]]
|
||||
; THUMB: cond.false:
|
||||
; THUMB-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
|
||||
; THUMB-NEXT: br label [[COND_END]]
|
||||
; THUMB: cond.end:
|
||||
; THUMB-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[LOR_LHS_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; THUMB-NEXT: ret i32 [[COND]]
|
||||
;
|
||||
; ARM-LABEL: @or_predicate_minsize(
|
||||
; ARM-NEXT: entry:
|
||||
; ARM-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 3
|
||||
; ARM-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
|
||||
; ARM-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
|
||||
; ARM-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]]
|
||||
; ARM-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
|
||||
; ARM: cond.false:
|
||||
; ARM-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
|
||||
; ARM-NEXT: br label [[COND_END]]
|
||||
; ARM: cond.end:
|
||||
; ARM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; ARM-NEXT: ret i32 [[COND]]
|
||||
;
|
||||
entry:
|
||||
%cmp = icmp sgt i32 %d, 3
|
||||
|
@ -202,19 +217,34 @@ cond.end:
|
|||
}
|
||||
|
||||
define i32 @or_invert_predicate_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) #0 {
|
||||
; CHECK-LABEL: @or_invert_predicate_minsize(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[D:%.*]], 3
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
|
||||
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]]
|
||||
; CHECK-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
|
||||
; CHECK: cond.false:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
|
||||
; CHECK-NEXT: br label [[COND_END]]
|
||||
; CHECK: cond.end:
|
||||
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: ret i32 [[COND]]
|
||||
; THUMB-LABEL: @or_invert_predicate_minsize(
|
||||
; THUMB-NEXT: entry:
|
||||
; THUMB-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 3
|
||||
; THUMB-NEXT: br i1 [[CMP]], label [[LOR_LHS_FALSE:%.*]], label [[COND_END:%.*]]
|
||||
; THUMB: lor.lhs.false:
|
||||
; THUMB-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
|
||||
; THUMB-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
|
||||
; THUMB-NEXT: br i1 [[CMP1]], label [[COND_END]], label [[COND_FALSE:%.*]]
|
||||
; THUMB: cond.false:
|
||||
; THUMB-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
|
||||
; THUMB-NEXT: br label [[COND_END]]
|
||||
; THUMB: cond.end:
|
||||
; THUMB-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[LOR_LHS_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; THUMB-NEXT: ret i32 [[COND]]
|
||||
;
|
||||
; ARM-LABEL: @or_invert_predicate_minsize(
|
||||
; ARM-NEXT: entry:
|
||||
; ARM-NEXT: [[CMP:%.*]] = icmp sle i32 [[D:%.*]], 3
|
||||
; ARM-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
|
||||
; ARM-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
|
||||
; ARM-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]]
|
||||
; ARM-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
|
||||
; ARM: cond.false:
|
||||
; ARM-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
|
||||
; ARM-NEXT: br label [[COND_END]]
|
||||
; ARM: cond.end:
|
||||
; ARM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; ARM-NEXT: ret i32 [[COND]]
|
||||
;
|
||||
entry:
|
||||
%cmp = icmp sgt i32 %d, 3
|
||||
|
@ -267,19 +297,33 @@ cond.end:
|
|||
}
|
||||
|
||||
define i32 @or_xor_predicate_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input, i1 %cmp) #0 {
|
||||
; CHECK-LABEL: @or_xor_predicate_minsize(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[CMP_NOT:%.*]] = xor i1 [[CMP:%.*]], true
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
|
||||
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP_NOT]], [[CMP1]]
|
||||
; CHECK-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
|
||||
; CHECK: cond.false:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
|
||||
; CHECK-NEXT: br label [[COND_END]]
|
||||
; CHECK: cond.end:
|
||||
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: ret i32 [[COND]]
|
||||
; THUMB-LABEL: @or_xor_predicate_minsize(
|
||||
; THUMB-NEXT: entry:
|
||||
; THUMB-NEXT: br i1 [[CMP:%.*]], label [[LOR_LHS_FALSE:%.*]], label [[COND_END:%.*]]
|
||||
; THUMB: lor.lhs.false:
|
||||
; THUMB-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
|
||||
; THUMB-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
|
||||
; THUMB-NEXT: br i1 [[CMP1]], label [[COND_END]], label [[COND_FALSE:%.*]]
|
||||
; THUMB: cond.false:
|
||||
; THUMB-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
|
||||
; THUMB-NEXT: br label [[COND_END]]
|
||||
; THUMB: cond.end:
|
||||
; THUMB-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[LOR_LHS_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; THUMB-NEXT: ret i32 [[COND]]
|
||||
;
|
||||
; ARM-LABEL: @or_xor_predicate_minsize(
|
||||
; ARM-NEXT: entry:
|
||||
; ARM-NEXT: [[CMP_NOT:%.*]] = xor i1 [[CMP:%.*]], true
|
||||
; ARM-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
|
||||
; ARM-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
|
||||
; ARM-NEXT: [[OR_COND:%.*]] = or i1 [[CMP_NOT]], [[CMP1]]
|
||||
; ARM-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
|
||||
; ARM: cond.false:
|
||||
; ARM-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
|
||||
; ARM-NEXT: br label [[COND_END]]
|
||||
; ARM: cond.end:
|
||||
; ARM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; ARM-NEXT: ret i32 [[COND]]
|
||||
;
|
||||
entry:
|
||||
br i1 %cmp, label %lor.lhs.false, label %cond.end
|
||||
|
@ -331,19 +375,33 @@ cond.end:
|
|||
}
|
||||
|
||||
define i32 @and_xor_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input, i1 %cmp) #0 {
|
||||
; CHECK-LABEL: @and_xor_minsize(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[CMP_NOT:%.*]] = xor i1 [[CMP:%.*]], true
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
|
||||
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP_NOT]], [[CMP1]]
|
||||
; CHECK-NEXT: br i1 [[OR_COND]], label [[COND_FALSE:%.*]], label [[COND_END:%.*]]
|
||||
; CHECK: cond.false:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
|
||||
; CHECK-NEXT: br label [[COND_END]]
|
||||
; CHECK: cond.end:
|
||||
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: ret i32 [[COND]]
|
||||
; THUMB-LABEL: @and_xor_minsize(
|
||||
; THUMB-NEXT: entry:
|
||||
; THUMB-NEXT: br i1 [[CMP:%.*]], label [[COND_END:%.*]], label [[LOR_LHS_FALSE:%.*]]
|
||||
; THUMB: lor.lhs.false:
|
||||
; THUMB-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
|
||||
; THUMB-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
|
||||
; THUMB-NEXT: br i1 [[CMP1]], label [[COND_FALSE:%.*]], label [[COND_END]]
|
||||
; THUMB: cond.false:
|
||||
; THUMB-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
|
||||
; THUMB-NEXT: br label [[COND_END]]
|
||||
; THUMB: cond.end:
|
||||
; THUMB-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[LOR_LHS_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; THUMB-NEXT: ret i32 [[COND]]
|
||||
;
|
||||
; ARM-LABEL: @and_xor_minsize(
|
||||
; ARM-NEXT: entry:
|
||||
; ARM-NEXT: [[CMP_NOT:%.*]] = xor i1 [[CMP:%.*]], true
|
||||
; ARM-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
|
||||
; ARM-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
|
||||
; ARM-NEXT: [[OR_COND:%.*]] = and i1 [[CMP_NOT]], [[CMP1]]
|
||||
; ARM-NEXT: br i1 [[OR_COND]], label [[COND_FALSE:%.*]], label [[COND_END:%.*]]
|
||||
; ARM: cond.false:
|
||||
; ARM-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
|
||||
; ARM-NEXT: br label [[COND_END]]
|
||||
; ARM: cond.end:
|
||||
; ARM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; ARM-NEXT: ret i32 [[COND]]
|
||||
;
|
||||
entry:
|
||||
br i1 %cmp, label %cond.end, label %lor.lhs.false
|
||||
|
|
Loading…
Reference in New Issue