[SimplifyCFG] Consider cost of combining predicates.

Modify FoldBranchToCommonDest to consider the cost of inserting
instructions when attempting to combine predicates to fold blocks.
The threshold can be controlled via a new option:
-simplifycfg-branch-fold-threshold which defaults to '2' to allow
the insertion of a not and another logical operator.

Differential Revision: https://reviews.llvm.org/D86526
This commit is contained in:
Sam Parker 2020-09-07 09:08:07 +01:00
parent 713c2ad60c
commit 65f78e73ad
3 changed files with 137 additions and 54 deletions

View File

@ -199,6 +199,7 @@ bool FlattenCFG(BasicBlock *BB, AAResults *AA = nullptr);
/// branches to us and one of our successors, fold the setcc into the /// branches to us and one of our successors, fold the setcc into the
/// predecessor and use logical operations to pick the right destination. /// predecessor and use logical operations to pick the right destination.
bool FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU = nullptr, bool FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU = nullptr,
const TargetTransformInfo *TTI = nullptr,
unsigned BonusInstThreshold = 1); unsigned BonusInstThreshold = 1);
/// This function takes a virtual register computed by an Instruction and /// This function takes a virtual register computed by an Instruction and

View File

@ -143,6 +143,13 @@ MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10),
cl::desc("Max size of a block which is still considered " cl::desc("Max size of a block which is still considered "
"small enough to thread through")); "small enough to thread through"));
// Two is chosen to allow one negation and a logical combine.
static cl::opt<unsigned>
BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
cl::init(2),
cl::desc("Maximum cost of combining conditions when "
"folding branches"));
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLinearMaps, STATISTIC(NumLinearMaps,
"Number of switch instructions turned into linear mapping"); "Number of switch instructions turned into linear mapping");
@ -2684,12 +2691,16 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
/// and one of our successors, fold the block into the predecessor and use /// and one of our successors, fold the block into the predecessor and use
/// logical operations to pick the right destination. /// logical operations to pick the right destination.
bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
const TargetTransformInfo *TTI,
unsigned BonusInstThreshold) { unsigned BonusInstThreshold) {
BasicBlock *BB = BI->getParent(); BasicBlock *BB = BI->getParent();
const unsigned PredCount = pred_size(BB); const unsigned PredCount = pred_size(BB);
bool Changed = false; bool Changed = false;
TargetTransformInfo::TargetCostKind CostKind =
BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize
: TargetTransformInfo::TCK_SizeAndLatency;
Instruction *Cond = nullptr; Instruction *Cond = nullptr;
if (BI->isConditional()) if (BI->isConditional())
@ -2818,6 +2829,19 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
continue; continue;
} }
// Check the cost of inserting the necessary logic before performing the
// transformation.
if (TTI && Opc != Instruction::BinaryOpsEnd) {
Type *Ty = BI->getCondition()->getType();
unsigned Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
!isa<CmpInst>(PBI->getCondition())))
Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
if (Cost > BranchFoldThreshold)
continue;
}
LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
Changed = true; Changed = true;
@ -6013,7 +6037,7 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
// branches to us and our successor, fold the comparison into the // branches to us and our successor, fold the comparison into the
// predecessor and use logical operations to update the incoming value // predecessor and use logical operations to update the incoming value
// for PHI nodes in common successor. // for PHI nodes in common successor.
if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold)) if (FoldBranchToCommonDest(BI, nullptr, &TTI, Options.BonusInstThreshold))
return requestResimplify(); return requestResimplify();
return false; return false;
} }
@ -6076,7 +6100,7 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// If this basic block is ONLY a compare and a branch, and if a predecessor // If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and one of our successors, fold the comparison into the // branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination. // predecessor and use logical operations to pick the right destination.
if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold)) if (FoldBranchToCommonDest(BI, nullptr, &TTI, Options.BonusInstThreshold))
return requestResimplify(); return requestResimplify();
// We have a conditional branch to two blocks that are only reachable // We have a conditional branch to two blocks that are only reachable

View File

@ -169,19 +169,34 @@ cond.end:
} }
define i32 @or_predicate_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) #0 { define i32 @or_predicate_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) #0 {
; CHECK-LABEL: @or_predicate_minsize( ; THUMB-LABEL: @or_predicate_minsize(
; CHECK-NEXT: entry: ; THUMB-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 3 ; THUMB-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 3
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] ; THUMB-NEXT: br i1 [[CMP]], label [[COND_END:%.*]], label [[LOR_LHS_FALSE:%.*]]
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] ; THUMB: lor.lhs.false:
; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]] ; THUMB-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]] ; THUMB-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
; CHECK: cond.false: ; THUMB-NEXT: br i1 [[CMP1]], label [[COND_END]], label [[COND_FALSE:%.*]]
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 ; THUMB: cond.false:
; CHECK-NEXT: br label [[COND_END]] ; THUMB-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
; CHECK: cond.end: ; THUMB-NEXT: br label [[COND_END]]
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ] ; THUMB: cond.end:
; CHECK-NEXT: ret i32 [[COND]] ; THUMB-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[LOR_LHS_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
; THUMB-NEXT: ret i32 [[COND]]
;
; ARM-LABEL: @or_predicate_minsize(
; ARM-NEXT: entry:
; ARM-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 3
; ARM-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
; ARM-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
; ARM-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]]
; ARM-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
; ARM: cond.false:
; ARM-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
; ARM-NEXT: br label [[COND_END]]
; ARM: cond.end:
; ARM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
; ARM-NEXT: ret i32 [[COND]]
; ;
entry: entry:
%cmp = icmp sgt i32 %d, 3 %cmp = icmp sgt i32 %d, 3
@ -202,19 +217,34 @@ cond.end:
} }
define i32 @or_invert_predicate_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) #0 { define i32 @or_invert_predicate_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) #0 {
; CHECK-LABEL: @or_invert_predicate_minsize( ; THUMB-LABEL: @or_invert_predicate_minsize(
; CHECK-NEXT: entry: ; THUMB-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[D:%.*]], 3 ; THUMB-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 3
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] ; THUMB-NEXT: br i1 [[CMP]], label [[LOR_LHS_FALSE:%.*]], label [[COND_END:%.*]]
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] ; THUMB: lor.lhs.false:
; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]] ; THUMB-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]] ; THUMB-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
; CHECK: cond.false: ; THUMB-NEXT: br i1 [[CMP1]], label [[COND_END]], label [[COND_FALSE:%.*]]
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 ; THUMB: cond.false:
; CHECK-NEXT: br label [[COND_END]] ; THUMB-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
; CHECK: cond.end: ; THUMB-NEXT: br label [[COND_END]]
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ] ; THUMB: cond.end:
; CHECK-NEXT: ret i32 [[COND]] ; THUMB-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[LOR_LHS_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
; THUMB-NEXT: ret i32 [[COND]]
;
; ARM-LABEL: @or_invert_predicate_minsize(
; ARM-NEXT: entry:
; ARM-NEXT: [[CMP:%.*]] = icmp sle i32 [[D:%.*]], 3
; ARM-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
; ARM-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
; ARM-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]]
; ARM-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
; ARM: cond.false:
; ARM-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
; ARM-NEXT: br label [[COND_END]]
; ARM: cond.end:
; ARM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
; ARM-NEXT: ret i32 [[COND]]
; ;
entry: entry:
%cmp = icmp sgt i32 %d, 3 %cmp = icmp sgt i32 %d, 3
@ -267,19 +297,33 @@ cond.end:
} }
define i32 @or_xor_predicate_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input, i1 %cmp) #0 { define i32 @or_xor_predicate_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input, i1 %cmp) #0 {
; CHECK-LABEL: @or_xor_predicate_minsize( ; THUMB-LABEL: @or_xor_predicate_minsize(
; CHECK-NEXT: entry: ; THUMB-NEXT: entry:
; CHECK-NEXT: [[CMP_NOT:%.*]] = xor i1 [[CMP:%.*]], true ; THUMB-NEXT: br i1 [[CMP:%.*]], label [[LOR_LHS_FALSE:%.*]], label [[COND_END:%.*]]
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] ; THUMB: lor.lhs.false:
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] ; THUMB-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP_NOT]], [[CMP1]] ; THUMB-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]] ; THUMB-NEXT: br i1 [[CMP1]], label [[COND_END]], label [[COND_FALSE:%.*]]
; CHECK: cond.false: ; THUMB: cond.false:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 ; THUMB-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
; CHECK-NEXT: br label [[COND_END]] ; THUMB-NEXT: br label [[COND_END]]
; CHECK: cond.end: ; THUMB: cond.end:
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ] ; THUMB-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[LOR_LHS_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: ret i32 [[COND]] ; THUMB-NEXT: ret i32 [[COND]]
;
; ARM-LABEL: @or_xor_predicate_minsize(
; ARM-NEXT: entry:
; ARM-NEXT: [[CMP_NOT:%.*]] = xor i1 [[CMP:%.*]], true
; ARM-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
; ARM-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
; ARM-NEXT: [[OR_COND:%.*]] = or i1 [[CMP_NOT]], [[CMP1]]
; ARM-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
; ARM: cond.false:
; ARM-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
; ARM-NEXT: br label [[COND_END]]
; ARM: cond.end:
; ARM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
; ARM-NEXT: ret i32 [[COND]]
; ;
entry: entry:
br i1 %cmp, label %lor.lhs.false, label %cond.end br i1 %cmp, label %lor.lhs.false, label %cond.end
@ -331,19 +375,33 @@ cond.end:
} }
define i32 @and_xor_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input, i1 %cmp) #0 { define i32 @and_xor_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input, i1 %cmp) #0 {
; CHECK-LABEL: @and_xor_minsize( ; THUMB-LABEL: @and_xor_minsize(
; CHECK-NEXT: entry: ; THUMB-NEXT: entry:
; CHECK-NEXT: [[CMP_NOT:%.*]] = xor i1 [[CMP:%.*]], true ; THUMB-NEXT: br i1 [[CMP:%.*]], label [[COND_END:%.*]], label [[LOR_LHS_FALSE:%.*]]
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] ; THUMB: lor.lhs.false:
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] ; THUMB-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP_NOT]], [[CMP1]] ; THUMB-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[COND_FALSE:%.*]], label [[COND_END:%.*]] ; THUMB-NEXT: br i1 [[CMP1]], label [[COND_FALSE:%.*]], label [[COND_END]]
; CHECK: cond.false: ; THUMB: cond.false:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 ; THUMB-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
; CHECK-NEXT: br label [[COND_END]] ; THUMB-NEXT: br label [[COND_END]]
; CHECK: cond.end: ; THUMB: cond.end:
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ] ; THUMB-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[LOR_LHS_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: ret i32 [[COND]] ; THUMB-NEXT: ret i32 [[COND]]
;
; ARM-LABEL: @and_xor_minsize(
; ARM-NEXT: entry:
; ARM-NEXT: [[CMP_NOT:%.*]] = xor i1 [[CMP:%.*]], true
; ARM-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
; ARM-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
; ARM-NEXT: [[OR_COND:%.*]] = and i1 [[CMP_NOT]], [[CMP1]]
; ARM-NEXT: br i1 [[OR_COND]], label [[COND_FALSE:%.*]], label [[COND_END:%.*]]
; ARM: cond.false:
; ARM-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
; ARM-NEXT: br label [[COND_END]]
; ARM: cond.end:
; ARM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
; ARM-NEXT: ret i32 [[COND]]
; ;
entry: entry:
br i1 %cmp, label %cond.end, label %lor.lhs.false br i1 %cmp, label %cond.end, label %lor.lhs.false