forked from OSchip/llvm-project
LoopRotate: Also rotate loops with multiple exits.
The old PHI updating code in loop-rotate was replaced with SSAUpdater a while ago, it has no problems with comples PHIs. What had to be fixed is detecting whether a loop was already rotated and updating dominators when multiple exits were present. This change increases overall code size a bit, mostly due to additional loop unrolling opportunities. Passes test-suite and selfhost with -verify-dom-info. Fixes PR7447. Thanks to Andy for the input on the domtree updating code. llvm-svn: 162912
This commit is contained in:
parent
d4a64716ab
commit
afdfdb5cff
|
@ -24,6 +24,7 @@
|
|||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
#include "llvm/Transforms/Utils/SSAUpdater.h"
|
||||
#include "llvm/Transforms/Utils/ValueMapper.h"
|
||||
#include "llvm/Support/CFG.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
using namespace llvm;
|
||||
|
@ -256,6 +257,7 @@ bool LoopRotate::rotateLoop(Loop *L) {
|
|||
return false;
|
||||
|
||||
BasicBlock *OrigHeader = L->getHeader();
|
||||
BasicBlock *OrigLatch = L->getLoopLatch();
|
||||
|
||||
BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
|
||||
if (BI == 0 || BI->isUnconditional())
|
||||
|
@ -267,13 +269,9 @@ bool LoopRotate::rotateLoop(Loop *L) {
|
|||
if (!L->isLoopExiting(OrigHeader))
|
||||
return false;
|
||||
|
||||
// Updating PHInodes in loops with multiple exits adds complexity.
|
||||
// Keep it simple, and restrict loop rotation to loops with one exit only.
|
||||
// In future, lift this restriction and support for multiple exits if
|
||||
// required.
|
||||
SmallVector<BasicBlock*, 8> ExitBlocks;
|
||||
L->getExitBlocks(ExitBlocks);
|
||||
if (ExitBlocks.size() > 1)
|
||||
// If the loop latch already contains a branch that leaves the loop then the
|
||||
// loop is already rotated.
|
||||
if (OrigLatch == 0 || L->isLoopExiting(OrigLatch))
|
||||
return false;
|
||||
|
||||
// Check size of original header and reject loop if it is very big.
|
||||
|
@ -286,11 +284,10 @@ bool LoopRotate::rotateLoop(Loop *L) {
|
|||
|
||||
// Now, this loop is suitable for rotation.
|
||||
BasicBlock *OrigPreheader = L->getLoopPreheader();
|
||||
BasicBlock *OrigLatch = L->getLoopLatch();
|
||||
|
||||
// If the loop could not be converted to canonical form, it must have an
|
||||
// indirectbr in it, just give up.
|
||||
if (OrigPreheader == 0 || OrigLatch == 0)
|
||||
if (OrigPreheader == 0)
|
||||
return false;
|
||||
|
||||
// Anything ScalarEvolution may know about this loop or the PHI nodes
|
||||
|
@ -298,6 +295,8 @@ bool LoopRotate::rotateLoop(Loop *L) {
|
|||
if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
|
||||
SE->forgetLoop(L);
|
||||
|
||||
DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
|
||||
|
||||
// Find new Loop header. NewHeader is a Header's one and only successor
|
||||
// that is inside loop. Header's other successor is outside the
|
||||
// loop. Otherwise loop is not suitable for rotation.
|
||||
|
@ -408,10 +407,16 @@ bool LoopRotate::rotateLoop(Loop *L) {
|
|||
// Update DominatorTree to reflect the CFG change we just made. Then split
|
||||
// edges as necessary to preserve LoopSimplify form.
|
||||
if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
|
||||
// Since OrigPreheader now has the conditional branch to Exit block, it is
|
||||
// the dominator of Exit.
|
||||
DT->changeImmediateDominator(Exit, OrigPreheader);
|
||||
DT->changeImmediateDominator(NewHeader, OrigPreheader);
|
||||
// Everything that was dominated by the old loop header is now dominated
|
||||
// by the original loop preheader. Conceptually the header was merged
|
||||
// into the preheader, even though we reuse the actual block as a new
|
||||
// loop latch.
|
||||
DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
|
||||
SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
|
||||
OrigHeaderNode->end());
|
||||
DomTreeNode *OrigPreheaderNode = DT->getNode(OrigPreheader);
|
||||
for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I)
|
||||
DT->changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode);
|
||||
|
||||
// Update OrigHeader to be dominated by the new header block.
|
||||
DT->changeImmediateDominator(OrigHeader, OrigLatch);
|
||||
|
@ -440,6 +445,46 @@ bool LoopRotate::rotateLoop(Loop *L) {
|
|||
// Update OrigHeader to be dominated by the new header block.
|
||||
DT->changeImmediateDominator(NewHeader, OrigPreheader);
|
||||
DT->changeImmediateDominator(OrigHeader, OrigLatch);
|
||||
|
||||
// Brute force incremental dominator tree update. Call
|
||||
// findNearestCommonDominator on all CFG predecessors of each child of the
|
||||
// original header.
|
||||
DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
|
||||
SmallVector<DomTreeNode *, 8> WorkList(OrigHeaderNode->begin(),
|
||||
OrigHeaderNode->end());
|
||||
while (!WorkList.empty()) {
|
||||
DomTreeNode *Node = WorkList.pop_back_val();
|
||||
BasicBlock *BB = Node->getBlock();
|
||||
BasicBlock *NearestDom = 0;
|
||||
for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;
|
||||
++PI) {
|
||||
BasicBlock *Pred = *PI;
|
||||
|
||||
// We have to process predecessors of a node before we touch the
|
||||
// actual node. If one of the predecessors is in our worklist, put it
|
||||
// and the currently processed node on the worklist and go processing
|
||||
// the predecessor.
|
||||
SmallVectorImpl<DomTreeNode *>::iterator I =
|
||||
std::find(WorkList.begin(), WorkList.end(), DT->getNode(Pred));
|
||||
if (I != WorkList.end()) {
|
||||
WorkList.push_back(Node);
|
||||
std::swap(*I, WorkList.back());
|
||||
// The predecessor is now at the end of the worklist.
|
||||
NearestDom = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
// On the first iteration start with Pred, on the other iterations we
|
||||
// narrow it down to the nearest common dominator.
|
||||
if (!NearestDom)
|
||||
NearestDom = Pred;
|
||||
else
|
||||
NearestDom = DT->findNearestCommonDominator(NearestDom, Pred);
|
||||
}
|
||||
|
||||
if (NearestDom)
|
||||
DT->changeImmediateDominator(BB, NearestDom);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -452,6 +497,8 @@ bool LoopRotate::rotateLoop(Loop *L) {
|
|||
// emitted code isn't too gross in this common case.
|
||||
MergeBlockIntoPredecessor(OrigHeader, this);
|
||||
|
||||
DEBUG(dbgs() << "LoopRotation: into "; L->dump());
|
||||
|
||||
++NumRotated;
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,200 @@
|
|||
; RUN: opt -S -loop-rotate < %s -verify-loop-info -verify-dom-info | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
; PR7447
|
||||
define i32 @test1([100 x i32]* nocapture %a) nounwind readonly {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.cond1, %entry
|
||||
%sum.0 = phi i32 [ 0, %entry ], [ %sum.1, %for.cond1 ]
|
||||
%i.0 = phi i1 [ true, %entry ], [ false, %for.cond1 ]
|
||||
br i1 %i.0, label %for.cond1, label %return
|
||||
|
||||
for.cond1: ; preds = %for.cond, %land.rhs
|
||||
%sum.1 = phi i32 [ %add, %land.rhs ], [ %sum.0, %for.cond ]
|
||||
%i.1 = phi i32 [ %inc, %land.rhs ], [ 0, %for.cond ]
|
||||
%cmp2 = icmp ult i32 %i.1, 100
|
||||
br i1 %cmp2, label %land.rhs, label %for.cond
|
||||
|
||||
land.rhs: ; preds = %for.cond1
|
||||
%conv = zext i32 %i.1 to i64
|
||||
%arrayidx = getelementptr inbounds [100 x i32]* %a, i64 0, i64 %conv
|
||||
%0 = load i32* %arrayidx, align 4
|
||||
%add = add i32 %0, %sum.1
|
||||
%cmp4 = icmp ugt i32 %add, 1000
|
||||
%inc = add i32 %i.1, 1
|
||||
br i1 %cmp4, label %return, label %for.cond1
|
||||
|
||||
return: ; preds = %for.cond, %land.rhs
|
||||
%retval.0 = phi i32 [ 1000, %land.rhs ], [ %sum.0, %for.cond ]
|
||||
ret i32 %retval.0
|
||||
|
||||
; CHECK: @test1
|
||||
; CHECK: for.cond1.preheader:
|
||||
; CHECK: %sum.04 = phi i32 [ 0, %entry ], [ %sum.1.lcssa, %for.cond.loopexit ]
|
||||
; CHECK: br label %for.cond1
|
||||
|
||||
; CHECK: for.cond1:
|
||||
; CHECK: %sum.1 = phi i32 [ %add, %land.rhs ], [ %sum.04, %for.cond1.preheader ]
|
||||
; CHECK: %i.1 = phi i32 [ %inc, %land.rhs ], [ 0, %for.cond1.preheader ]
|
||||
; CHECK: %cmp2 = icmp ult i32 %i.1, 100
|
||||
; CHECK: br i1 %cmp2, label %land.rhs, label %for.cond.loopexit
|
||||
}
|
||||
|
||||
define void @test2(i32 %x) nounwind {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %if.end, %entry
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %if.end ]
|
||||
%cmp = icmp eq i32 %i.0, %x
|
||||
br i1 %cmp, label %return.loopexit, label %for.body
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%call = tail call i32 @foo(i32 %i.0) nounwind
|
||||
%tobool = icmp eq i32 %call, 0
|
||||
br i1 %tobool, label %if.end, label %a
|
||||
|
||||
if.end: ; preds = %for.body
|
||||
%call1 = tail call i32 @foo(i32 42) nounwind
|
||||
%inc = add i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
a: ; preds = %for.body
|
||||
%call2 = tail call i32 @bar(i32 1) nounwind
|
||||
br label %return
|
||||
|
||||
return.loopexit: ; preds = %for.cond
|
||||
br label %return
|
||||
|
||||
return: ; preds = %return.loopexit, %a
|
||||
ret void
|
||||
|
||||
; CHECK: @test2
|
||||
; CHECK: if.end:
|
||||
; CHECK: %inc = add i32 %i.02, 1
|
||||
; CHECK: %cmp = icmp eq i32 %inc, %x
|
||||
; CHECK: br i1 %cmp, label %for.cond.return.loopexit_crit_edge, label %for.body
|
||||
}
|
||||
|
||||
declare i32 @foo(i32)
|
||||
|
||||
declare i32 @bar(i32)
|
||||
|
||||
@_ZTIi = external constant i8*
|
||||
|
||||
; Verify dominators.
|
||||
define void @test3(i32 %x) {
|
||||
entry:
|
||||
%cmp2 = icmp eq i32 0, %x
|
||||
br i1 %cmp2, label %try.cont.loopexit, label %for.body.lr.ph
|
||||
|
||||
for.body.lr.ph: ; preds = %entry
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body.lr.ph, %for.inc
|
||||
%i.03 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
|
||||
invoke void @_Z3fooi(i32 %i.03)
|
||||
to label %for.inc unwind label %lpad
|
||||
|
||||
for.inc: ; preds = %for.body
|
||||
%inc = add i32 %i.03, 1
|
||||
%cmp = icmp eq i32 %inc, %x
|
||||
br i1 %cmp, label %for.cond.try.cont.loopexit_crit_edge, label %for.body
|
||||
|
||||
lpad: ; preds = %for.body
|
||||
%0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
|
||||
catch i8* bitcast (i8** @_ZTIi to i8*)
|
||||
%1 = extractvalue { i8*, i32 } %0, 0
|
||||
%2 = extractvalue { i8*, i32 } %0, 1
|
||||
%3 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind
|
||||
%matches = icmp eq i32 %2, %3
|
||||
br i1 %matches, label %catch, label %eh.resume
|
||||
|
||||
catch: ; preds = %lpad
|
||||
%4 = tail call i8* @__cxa_begin_catch(i8* %1) nounwind
|
||||
br i1 true, label %invoke.cont2.loopexit, label %for.body.i.lr.ph
|
||||
|
||||
for.body.i.lr.ph: ; preds = %catch
|
||||
br label %for.body.i
|
||||
|
||||
for.body.i: ; preds = %for.body.i.lr.ph, %for.inc.i
|
||||
%i.0.i1 = phi i32 [ 0, %for.body.i.lr.ph ], [ %inc.i, %for.inc.i ]
|
||||
invoke void @_Z3fooi(i32 %i.0.i1)
|
||||
to label %for.inc.i unwind label %lpad.i
|
||||
|
||||
for.inc.i: ; preds = %for.body.i
|
||||
%inc.i = add i32 %i.0.i1, 1
|
||||
%cmp.i = icmp eq i32 %inc.i, 0
|
||||
br i1 %cmp.i, label %for.cond.i.invoke.cont2.loopexit_crit_edge, label %for.body.i
|
||||
|
||||
lpad.i: ; preds = %for.body.i
|
||||
%5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
|
||||
catch i8* bitcast (i8** @_ZTIi to i8*)
|
||||
%6 = extractvalue { i8*, i32 } %5, 0
|
||||
%7 = extractvalue { i8*, i32 } %5, 1
|
||||
%matches.i = icmp eq i32 %7, %3
|
||||
br i1 %matches.i, label %catch.i, label %lpad1.body
|
||||
|
||||
catch.i: ; preds = %lpad.i
|
||||
%8 = tail call i8* @__cxa_begin_catch(i8* %6) nounwind
|
||||
invoke void @_Z3barj(i32 0)
|
||||
to label %invoke.cont2.i unwind label %lpad1.i
|
||||
|
||||
invoke.cont2.i: ; preds = %catch.i
|
||||
tail call void @__cxa_end_catch() nounwind
|
||||
br label %invoke.cont2
|
||||
|
||||
lpad1.i: ; preds = %catch.i
|
||||
%9 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
|
||||
cleanup
|
||||
%10 = extractvalue { i8*, i32 } %9, 0
|
||||
%11 = extractvalue { i8*, i32 } %9, 1
|
||||
tail call void @__cxa_end_catch() nounwind
|
||||
br label %lpad1.body
|
||||
|
||||
for.cond.i.invoke.cont2.loopexit_crit_edge: ; preds = %for.inc.i
|
||||
br label %invoke.cont2.loopexit
|
||||
|
||||
invoke.cont2.loopexit: ; preds = %for.cond.i.invoke.cont2.loopexit_crit_edge, %catch
|
||||
br label %invoke.cont2
|
||||
|
||||
invoke.cont2: ; preds = %invoke.cont2.loopexit, %invoke.cont2.i
|
||||
tail call void @__cxa_end_catch() nounwind
|
||||
br label %try.cont
|
||||
|
||||
for.cond.try.cont.loopexit_crit_edge: ; preds = %for.inc
|
||||
br label %try.cont.loopexit
|
||||
|
||||
try.cont.loopexit: ; preds = %for.cond.try.cont.loopexit_crit_edge, %entry
|
||||
br label %try.cont
|
||||
|
||||
try.cont: ; preds = %try.cont.loopexit, %invoke.cont2
|
||||
ret void
|
||||
|
||||
lpad1.body: ; preds = %lpad1.i, %lpad.i
|
||||
%exn.slot.0.i = phi i8* [ %10, %lpad1.i ], [ %6, %lpad.i ]
|
||||
%ehselector.slot.0.i = phi i32 [ %11, %lpad1.i ], [ %7, %lpad.i ]
|
||||
tail call void @__cxa_end_catch() nounwind
|
||||
br label %eh.resume
|
||||
|
||||
eh.resume: ; preds = %lpad1.body, %lpad
|
||||
%exn.slot.0 = phi i8* [ %exn.slot.0.i, %lpad1.body ], [ %1, %lpad ]
|
||||
%ehselector.slot.0 = phi i32 [ %ehselector.slot.0.i, %lpad1.body ], [ %2, %lpad ]
|
||||
%lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.0, 0
|
||||
%lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.0, 1
|
||||
resume { i8*, i32 } %lpad.val5
|
||||
}
|
||||
|
||||
declare void @_Z3fooi(i32)
|
||||
|
||||
declare i32 @__gxx_personality_v0(...)
|
||||
|
||||
declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
|
||||
|
||||
declare i8* @__cxa_begin_catch(i8*)
|
||||
|
||||
declare void @__cxa_end_catch()
|
Loading…
Reference in New Issue