forked from OSchip/llvm-project
[SimlifyCFG] Prevent passes from destroying canonical loop structure, especially for nested loops
When eliminating or merging almost empty basic blocks, the existence of non-trivial PHI nodes is currently used to recognize potential loops of which the block is the header and keep the block. However, the current algorithm fails if the loops' exit condition is evaluated only with volatile values hence no PHI nodes in the header. Especially when such a loop is an outer loop of a nested loop, the loop is collapsed into a single loop which prevent later optimizations from being applied (e.g., transforming nested loops into simplified forms and loop vectorization). The patch augments the existing PHI node-based check by adding a pre-test if the BB actually belongs to a set of loop headers and not eliminating it if yes. llvm-svn: 264697
This commit is contained in:
parent
f54530ef00
commit
4673f10568
|
@ -21,6 +21,7 @@
|
|||
#include "llvm/IR/GetElementPtrTypeIterator.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/Operator.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
|
@ -124,13 +125,16 @@ bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB);
|
|||
/// values, but instcombine orders them so it usually won't matter.
|
||||
bool EliminateDuplicatePHINodes(BasicBlock *BB);
|
||||
|
||||
/// This function is used to do simplification of a CFG. For example, it
|
||||
/// adjusts branches to branches to eliminate the extra hop, it eliminates
|
||||
/// unreachable basic blocks, and does other "peephole" optimization of the CFG.
|
||||
/// It returns true if a modification was made, possibly deleting the basic
|
||||
/// block that was pointed to.
|
||||
/// This function is used to do simplification of a CFG. For
|
||||
/// example, it adjusts branches to branches to eliminate the extra hop, it
|
||||
/// eliminates unreachable basic blocks, and does other "peephole" optimization
|
||||
/// of the CFG. It returns true if a modification was made, possibly deleting
|
||||
/// the basic block that was pointed to. LoopHeaders is an optional input
|
||||
/// parameter, providing the set of loop header that SimplifyCFG should not
|
||||
/// eliminate.
|
||||
bool SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
|
||||
unsigned BonusInstThreshold, AssumptionCache *AC = nullptr);
|
||||
unsigned BonusInstThreshold, AssumptionCache *AC = nullptr,
|
||||
SmallPtrSetImpl<BasicBlock *> *LoopHeaders = nullptr);
|
||||
|
||||
/// This function is used to flatten a CFG. For example, it uses parallel-and
|
||||
/// and parallel-or mode to collapse if-conditions and merge if-regions with
|
||||
|
|
|
@ -245,10 +245,13 @@ bool JumpThreading::runOnFunction(Function &F) {
|
|||
// Can't thread an unconditional jump, but if the block is "almost
|
||||
// empty", we can replace uses of it with uses of the successor and make
|
||||
// this dead.
|
||||
// We should not eliminate the loop header either, because eliminating
|
||||
// a loop header might later prevent LoopSimplify from transforming nested
|
||||
// loops into simplified form.
|
||||
if (BI && BI->isUnconditional() &&
|
||||
BB != &BB->getParent()->getEntryBlock() &&
|
||||
// If the terminator is the only non-phi instruction, try to nuke it.
|
||||
BB->getFirstNonPHIOrDbg()->isTerminator()) {
|
||||
BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB)) {
|
||||
// Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
|
||||
// block, we have to make sure it isn't in the LoopHeaders set. We
|
||||
// reinsert afterward if needed.
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include "llvm/Analysis/GlobalsModRef.h"
|
||||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/Analysis/CFG.h"
|
||||
#include "llvm/IR/Attributes.h"
|
||||
#include "llvm/IR/CFG.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
|
@ -131,12 +132,19 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
|
|||
unsigned BonusInstThreshold) {
|
||||
bool Changed = false;
|
||||
bool LocalChange = true;
|
||||
|
||||
SmallVector<std::pair<const BasicBlock *, const BasicBlock *>, 32> Edges;
|
||||
FindFunctionBackedges(F, Edges);
|
||||
SmallPtrSet<BasicBlock *, 16> LoopHeaders;
|
||||
for (unsigned i = 0, e = Edges.size(); i != e; ++i)
|
||||
LoopHeaders.insert(const_cast<BasicBlock *>(Edges[i].second));
|
||||
|
||||
while (LocalChange) {
|
||||
LocalChange = false;
|
||||
|
||||
// Loop over all of the basic blocks and remove them if they are unneeded.
|
||||
for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
|
||||
if (SimplifyCFG(&*BBIt++, TTI, BonusInstThreshold, AC)) {
|
||||
if (SimplifyCFG(&*BBIt++, TTI, BonusInstThreshold, AC, &LoopHeaders)) {
|
||||
LocalChange = true;
|
||||
++NumSimpl;
|
||||
}
|
||||
|
|
|
@ -135,6 +135,7 @@ class SimplifyCFGOpt {
|
|||
const DataLayout &DL;
|
||||
unsigned BonusInstThreshold;
|
||||
AssumptionCache *AC;
|
||||
SmallPtrSetImpl<BasicBlock *> *LoopHeaders;
|
||||
Value *isValueEqualityComparison(TerminatorInst *TI);
|
||||
BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
|
||||
std::vector<ValueEqualityComparisonCase> &Cases);
|
||||
|
@ -157,8 +158,10 @@ class SimplifyCFGOpt {
|
|||
|
||||
public:
|
||||
SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL,
|
||||
unsigned BonusInstThreshold, AssumptionCache *AC)
|
||||
: TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC) {}
|
||||
unsigned BonusInstThreshold, AssumptionCache *AC,
|
||||
SmallPtrSetImpl<BasicBlock *> *LoopHeaders)
|
||||
: TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC),
|
||||
LoopHeaders(LoopHeaders) {}
|
||||
bool run(BasicBlock *BB);
|
||||
};
|
||||
}
|
||||
|
@ -3362,6 +3365,7 @@ bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) {
|
|||
|
||||
// The landingpad is now unreachable. Zap it.
|
||||
BB->eraseFromParent();
|
||||
if (LoopHeaders) LoopHeaders->erase(BB);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -3560,9 +3564,11 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
|
|||
}
|
||||
|
||||
// If we eliminated all predecessors of the block, delete the block now.
|
||||
if (pred_empty(BB))
|
||||
if (pred_empty(BB)) {
|
||||
// We know there are no successors, so just nuke the block.
|
||||
BB->eraseFromParent();
|
||||
if (LoopHeaders) LoopHeaders->erase(BB);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -3719,6 +3725,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
|
|||
BB != &BB->getParent()->getEntryBlock()) {
|
||||
// We know there are no successors, so just nuke the block.
|
||||
BB->eraseFromParent();
|
||||
if (LoopHeaders) LoopHeaders->erase(BB);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -5062,8 +5069,14 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
|
|||
return true;
|
||||
|
||||
// If the Terminator is the only non-phi instruction, simplify the block.
|
||||
// if LoopHeader is provided, check if the block is a loop header
|
||||
// (This is for early invocations before loop simplify and vectorization
|
||||
// to keep canonical loop forms for nested loops.
|
||||
// These blocks can be eliminated when the pass is invoked later
|
||||
// in the back-end.)
|
||||
BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator();
|
||||
if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
|
||||
(!LoopHeaders || (LoopHeaders && !LoopHeaders->count(BB))) &&
|
||||
TryToSimplifyUncondBranchFromEmptyBlock(BB))
|
||||
return true;
|
||||
|
||||
|
@ -5343,7 +5356,8 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
|
|||
/// of the CFG. It returns true if a modification was made.
|
||||
///
|
||||
bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
|
||||
unsigned BonusInstThreshold, AssumptionCache *AC) {
|
||||
unsigned BonusInstThreshold, AssumptionCache *AC,
|
||||
SmallPtrSetImpl<BasicBlock *> *LoopHeaders) {
|
||||
return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(),
|
||||
BonusInstThreshold, AC).run(BB);
|
||||
BonusInstThreshold, AC, LoopHeaders).run(BB);
|
||||
}
|
||||
|
|
|
@ -16,23 +16,23 @@ for.body: ; preds = %for.inc, %for.body.
|
|||
%cmp1 = icmp eq i32 %a, 12345
|
||||
br i1 %cmp1, label %if.then, label %if.else, !prof !0
|
||||
; CHECK: %cmp1 = icmp eq i32 %a, 12345
|
||||
; CHECK-NEXT: br i1 %cmp1, label %if.then.us, label %if.else, !prof !0
|
||||
; CHECK-NEXT: br i1 %cmp1, label %for.body.us, label %for.body, !prof !0
|
||||
if.then: ; preds = %for.body
|
||||
; CHECK: if.then.us:
|
||||
; CHECK: for.body.us:
|
||||
; CHECK: add nsw i32 %{{.*}}, 123
|
||||
; CHECK: %exitcond.us = icmp eq i32 %inc.us, %b
|
||||
; CHECK: br i1 %exitcond.us, label %for.cond.cleanup, label %if.then.us
|
||||
; CHECK: br i1 %exitcond.us, label %for.cond.cleanup, label %for.body.us
|
||||
%add = add nsw i32 %add.i, 123
|
||||
br label %for.inc
|
||||
|
||||
if.else: ; preds = %for.body
|
||||
%mul = mul nsw i32 %mul.i, %b
|
||||
br label %for.inc
|
||||
; CHECK: if.else:
|
||||
; CHECK: for.body:
|
||||
; CHECK: %mul = mul nsw i32 %mul.i, %b
|
||||
; CHECK: %inc = add nuw nsw i32 %inc.i, 1
|
||||
; CHECK: %exitcond = icmp eq i32 %inc, %b
|
||||
; CHECK: br i1 %exitcond, label %for.cond.cleanup, label %if.else
|
||||
; CHECK: br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
||||
for.inc: ; preds = %if.then, %if.else
|
||||
%mul.p = phi i32 [ %b, %if.then ], [ %mul, %if.else ]
|
||||
%add.p = phi i32 [ %add, %if.then ], [ %a, %if.else ]
|
||||
|
|
|
@ -16,10 +16,10 @@
|
|||
; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0.split
|
||||
|
||||
; CHECK: entry.split:
|
||||
; CHECK-NEXT: br i1 %b, label %cond.end, label %abort1.split
|
||||
; CHECK-NEXT: br i1 %b, label %for.body, label %abort1.split
|
||||
|
||||
; CHECK: cond.end:
|
||||
; CHECK-NEXT: br label %cond.end
|
||||
; CHECK: for.body:
|
||||
; CHECK-NEXT: br label %for.body
|
||||
|
||||
; CHECK: abort0.split:
|
||||
; CHECK-NEXT: call void @end0() [[NOR_NUW:#[0-9]+]]
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: opt < %s -simplifycfg -S > %t
|
||||
; RUN: not grep "^BB.tomerge" %t
|
||||
; RUN: grep "^BB.nomerge" %t | count 2
|
||||
; RUN: grep "^BB.nomerge" %t | count 4
|
||||
|
||||
; ModuleID = '<stdin>'
|
||||
declare i1 @foo()
|
||||
|
@ -54,24 +54,24 @@ Exit: ; preds = %Succ
|
|||
ret void
|
||||
}
|
||||
|
||||
; This function can be merged
|
||||
; This function can't be merged (for keeping canonical loop structures)
|
||||
define void @c() {
|
||||
entry:
|
||||
br label %BB.tomerge
|
||||
br label %BB.nomerge
|
||||
|
||||
BB.tomerge: ; preds = %Common, %entry
|
||||
BB.nomerge: ; preds = %Common, %entry
|
||||
br label %Succ
|
||||
|
||||
Succ: ; preds = %Common, %BB.tomerge, %Pre-Exit
|
||||
; This phi has identical values for Common and (through BB) Common,
|
||||
; blocks can't be merged
|
||||
%b = phi i32 [ 1, %BB.tomerge ], [ 1, %Common ], [ 2, %Pre-Exit ]
|
||||
%b = phi i32 [ 1, %BB.nomerge ], [ 1, %Common ], [ 2, %Pre-Exit ]
|
||||
%conde = call i1 @foo( ) ; <i1> [#uses=1]
|
||||
br i1 %conde, label %Common, label %Pre-Exit
|
||||
|
||||
Common: ; preds = %Succ
|
||||
%cond = call i1 @foo( ) ; <i1> [#uses=1]
|
||||
br i1 %cond, label %BB.tomerge, label %Succ
|
||||
br i1 %cond, label %BB.nomerge, label %Succ
|
||||
|
||||
Pre-Exit: ; preds = %Succ
|
||||
; This adds a backedge, so the %b phi node gets a third branch and is
|
||||
|
@ -83,25 +83,25 @@ Exit: ; preds = %Pre-Exit
|
|||
ret void
|
||||
}
|
||||
|
||||
; This function can be merged
|
||||
; This function can't be merged (for keeping canonical loop structures)
|
||||
define void @d() {
|
||||
entry:
|
||||
br label %BB.tomerge
|
||||
br label %BB.nomerge
|
||||
|
||||
BB.tomerge: ; preds = %Common, %entry
|
||||
BB.nomerge: ; preds = %Common, %entry
|
||||
; This phi has a matching value (0) with below phi (0), so blocks
|
||||
; can be merged.
|
||||
%a = phi i32 [ 1, %entry ], [ 0, %Common ] ; <i32> [#uses=1]
|
||||
br label %Succ
|
||||
|
||||
Succ: ; preds = %Common, %BB.tomerge
|
||||
%b = phi i32 [ %a, %BB.tomerge ], [ 0, %Common ] ; <i32> [#uses=0]
|
||||
%b = phi i32 [ %a, %BB.nomerge ], [ 0, %Common ] ; <i32> [#uses=0]
|
||||
%conde = call i1 @foo( ) ; <i1> [#uses=1]
|
||||
br i1 %conde, label %Common, label %Exit
|
||||
|
||||
Common: ; preds = %Succ
|
||||
%cond = call i1 @foo( ) ; <i1> [#uses=1]
|
||||
br i1 %cond, label %BB.tomerge, label %Succ
|
||||
br i1 %cond, label %BB.nomerge, label %Succ
|
||||
|
||||
Exit: ; preds = %Succ
|
||||
ret void
|
||||
|
@ -110,21 +110,21 @@ Exit: ; preds = %Succ
|
|||
; This function can be merged
|
||||
define void @e() {
|
||||
entry:
|
||||
br label %BB.tomerge
|
||||
br label %Succ
|
||||
|
||||
BB.tomerge: ; preds = %Use, %entry
|
||||
Succ: ; preds = %Use, %entry
|
||||
; This phi is used somewhere else than Succ, but this should not prevent
|
||||
; merging this block
|
||||
%a = phi i32 [ 1, %entry ], [ 0, %Use ] ; <i32> [#uses=1]
|
||||
br label %Succ
|
||||
br label %BB.tomerge
|
||||
|
||||
Succ: ; preds = %BB.tomerge
|
||||
BB.tomerge: ; preds = %BB.tomerge
|
||||
%conde = call i1 @foo( ) ; <i1> [#uses=1]
|
||||
br i1 %conde, label %Use, label %Exit
|
||||
|
||||
Use: ; preds = %Succ
|
||||
%cond = call i1 @bar( i32 %a ) ; <i1> [#uses=1]
|
||||
br i1 %cond, label %BB.tomerge, label %Exit
|
||||
br i1 %cond, label %Succ, label %Exit
|
||||
|
||||
Exit: ; preds = %Use, %Succ
|
||||
ret void
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
; RUN: not grep X: %t
|
||||
; RUN: not grep 'switch i32[^U]+%U' %t
|
||||
; RUN: not grep "^BB.tomerge" %t
|
||||
; RUN: grep "^BB.nomerge" %t | count 2
|
||||
; RUN: grep "^BB.nomerge" %t | count 4
|
||||
;
|
||||
|
||||
; ModuleID = '<stdin>'
|
||||
|
@ -179,24 +179,24 @@ Exit: ; preds = %Succ
|
|||
ret void
|
||||
}
|
||||
|
||||
; This function can be merged
|
||||
; This function can't be merged (for keeping canonical loop structures)
|
||||
define void @c() {
|
||||
entry:
|
||||
br label %BB.tomerge
|
||||
br label %BB.nomerge
|
||||
|
||||
BB.tomerge: ; preds = %Common, %entry
|
||||
BB.nomerge: ; preds = %Common, %entry
|
||||
br label %Succ
|
||||
|
||||
Succ: ; preds = %Common, %BB.tomerge, %Pre-Exit
|
||||
; This phi has identical values for Common and (through BB) Common,
|
||||
; blocks can't be merged
|
||||
%b = phi i32 [ 1, %BB.tomerge ], [ 1, %Common ], [ 2, %Pre-Exit ]
|
||||
%b = phi i32 [ 1, %BB.nomerge ], [ 1, %Common ], [ 2, %Pre-Exit ]
|
||||
%conde = call i1 @foo( ) ; <i1> [#uses=1]
|
||||
br i1 %conde, label %Common, label %Pre-Exit
|
||||
|
||||
Common: ; preds = %Succ
|
||||
%cond = call i1 @foo( ) ; <i1> [#uses=1]
|
||||
br i1 %cond, label %BB.tomerge, label %Succ
|
||||
br i1 %cond, label %BB.nomerge, label %Succ
|
||||
|
||||
Pre-Exit: ; preds = %Succ
|
||||
; This adds a backedge, so the %b phi node gets a third branch and is
|
||||
|
@ -208,25 +208,25 @@ Exit: ; preds = %Pre-Exit
|
|||
ret void
|
||||
}
|
||||
|
||||
; This function can be merged
|
||||
; This function can't be merged (for keeping canonical loop structures)
|
||||
define void @d() {
|
||||
entry:
|
||||
br label %BB.tomerge
|
||||
br label %BB.nomerge
|
||||
|
||||
BB.tomerge: ; preds = %Common, %entry
|
||||
BB.nomerge: ; preds = %Common, %entry
|
||||
; This phi has a matching value (0) with below phi (0), so blocks
|
||||
; can be merged.
|
||||
%a = phi i32 [ 1, %entry ], [ 0, %Common ] ; <i32> [#uses=1]
|
||||
br label %Succ
|
||||
|
||||
Succ: ; preds = %Common, %BB.tomerge
|
||||
%b = phi i32 [ %a, %BB.tomerge ], [ 0, %Common ] ; <i32> [#uses=0]
|
||||
%b = phi i32 [ %a, %BB.nomerge ], [ 0, %Common ] ; <i32> [#uses=0]
|
||||
%conde = call i1 @foo( ) ; <i1> [#uses=1]
|
||||
br i1 %conde, label %Common, label %Exit
|
||||
|
||||
Common: ; preds = %Succ
|
||||
%cond = call i1 @foo( ) ; <i1> [#uses=1]
|
||||
br i1 %cond, label %BB.tomerge, label %Succ
|
||||
br i1 %cond, label %BB.nomerge, label %Succ
|
||||
|
||||
Exit: ; preds = %Succ
|
||||
ret void
|
||||
|
@ -235,21 +235,21 @@ Exit: ; preds = %Succ
|
|||
; This function can be merged
|
||||
define void @e() {
|
||||
entry:
|
||||
br label %BB.tomerge
|
||||
br label %Succ
|
||||
|
||||
BB.tomerge: ; preds = %Use, %entry
|
||||
Succ: ; preds = %Use, %entry
|
||||
; This phi is used somewhere else than Succ, but this should not prevent
|
||||
; merging this block
|
||||
%a = phi i32 [ 1, %entry ], [ 0, %Use ] ; <i32> [#uses=1]
|
||||
br label %Succ
|
||||
br label %BB.tomerge
|
||||
|
||||
Succ: ; preds = %BB.tomerge
|
||||
BB.tomerge: ; preds = %Succ
|
||||
%conde = call i1 @foo( ) ; <i1> [#uses=1]
|
||||
br i1 %conde, label %Use, label %Exit
|
||||
|
||||
Use: ; preds = %Succ
|
||||
%cond = call i1 @bar( i32 %a ) ; <i1> [#uses=1]
|
||||
br i1 %cond, label %BB.tomerge, label %Exit
|
||||
br i1 %cond, label %Succ, label %Exit
|
||||
|
||||
Exit: ; preds = %Use, %Succ
|
||||
ret void
|
||||
|
|
|
@ -1306,8 +1306,8 @@ l6:
|
|||
; Speculation depth must be limited to avoid a zero-cost instruction cycle.
|
||||
|
||||
; CHECK-LABEL: @PR26308(
|
||||
; CHECK: cleanup4:
|
||||
; CHECK-NEXT: br label %cleanup4
|
||||
; CHECK: while.body:
|
||||
; CHECK-NEXT: br label %while.body
|
||||
|
||||
define i32 @PR26308(i1 %B, i64 %load) {
|
||||
entry:
|
||||
|
|
Loading…
Reference in New Issue