forked from OSchip/llvm-project
Add support for reduction variables when IF-conversion is enabled.
llvm-svn: 169288
This commit is contained in:
parent
dfd779e108
commit
a10b311aec
|
@ -1133,8 +1133,8 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
|
|||
// Reductions do not have to start at zero. They can start with
|
||||
// any loop invariant values.
|
||||
VecRdxPhi->addIncoming(VectorStart, VecPreheader);
|
||||
unsigned SelfEdgeIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody);
|
||||
Value *Val = getVectorValue(RdxPhi->getIncomingValue(SelfEdgeIdx));
|
||||
Value *Val =
|
||||
getVectorValue(RdxPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
|
||||
VecRdxPhi->addIncoming(Val, LoopVectorBody);
|
||||
|
||||
// Before each round, move the insertion point right between
|
||||
|
@ -1201,8 +1201,11 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
|
|||
|
||||
// Fix the scalar loop reduction variable with the incoming reduction sum
|
||||
// from the vector body and from the backedge value.
|
||||
int IncomingEdgeBlockIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody);
|
||||
int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); // The other block.
|
||||
int IncomingEdgeBlockIdx =
|
||||
(RdxPhi)->getBasicBlockIndex(OrigLoop->getLoopLatch());
|
||||
assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
|
||||
// Pick the other block.
|
||||
int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
|
||||
(RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, Scalar0);
|
||||
(RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
|
||||
}// end of for each redux variable.
|
||||
|
@ -1961,11 +1964,13 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
|
|||
if (Phi->getNumIncomingValues() != 2)
|
||||
return false;
|
||||
|
||||
// Find the possible incoming reduction variable.
|
||||
BasicBlock *BB = Phi->getParent();
|
||||
int SelfEdgeIdx = Phi->getBasicBlockIndex(BB);
|
||||
int InEdgeBlockIdx = (SelfEdgeIdx ? 0 : 1); // The other entry.
|
||||
Value *RdxStart = Phi->getIncomingValue(InEdgeBlockIdx);
|
||||
// Reduction variables are only found in the loop header block.
|
||||
if (Phi->getParent() != TheLoop->getHeader())
|
||||
return false;
|
||||
|
||||
// Obtain the reduction start value from the value that comes from the loop
|
||||
// preheader.
|
||||
Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
|
||||
|
||||
// ExitInstruction is the single value which is used outside the loop.
|
||||
// We only allow for a single reduction value to be used outside the loop.
|
||||
|
@ -2003,9 +2008,17 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
|
|||
FoundStartPHI = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// We allow in-loop PHINodes which are not the original reduction PHI
|
||||
// node. If this PHI is the only user of Iter (happens in IF w/ no ELSE
|
||||
// structure) then don't skip this PHI.
|
||||
if (isa<PHINode>(U) && U->getParent() != TheLoop->getHeader() &&
|
||||
TheLoop->contains(U->getParent()) && Iter->getNumUses() > 1)
|
||||
continue;
|
||||
|
||||
// Check if we found the exit user.
|
||||
BasicBlock *Parent = U->getParent();
|
||||
if (Parent != BB) {
|
||||
if (!TheLoop->contains(Parent)) {
|
||||
// We must have a single exit instruction.
|
||||
if (ExitInstruction != 0)
|
||||
return false;
|
||||
|
|
|
@ -58,3 +58,51 @@ if.end:
|
|||
for.end:
|
||||
ret i32 undef
|
||||
}
|
||||
|
||||
|
||||
|
||||
; int func(int *A, int n) {
|
||||
; unsigned sum = 0;
|
||||
; for (int i = 0; i < n; ++i)
|
||||
; if (A[i] > 30)
|
||||
; sum += A[i] + 2;
|
||||
;
|
||||
; return sum;
|
||||
; }
|
||||
|
||||
;CHECK: @reduction_func
|
||||
;CHECK: load <4 x i32>
|
||||
;CHECK: icmp sgt <4 x i32>
|
||||
;CHECK: add <4 x i32>
|
||||
;CHECK: select <4 x i1>
|
||||
;CHECK: ret i32
|
||||
define i32 @reduction_func(i32* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
|
||||
entry:
|
||||
%cmp10 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp10, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %entry, %for.inc
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
|
||||
%sum.011 = phi i32 [ %sum.1, %for.inc ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
|
||||
%0 = load i32* %arrayidx, align 4
|
||||
%cmp1 = icmp sgt i32 %0, 30
|
||||
br i1 %cmp1, label %if.then, label %for.inc
|
||||
|
||||
if.then: ; preds = %for.body
|
||||
%add = add i32 %sum.011, 2
|
||||
%add4 = add i32 %add, %0
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body, %if.then
|
||||
%sum.1 = phi i32 [ %add4, %if.then ], [ %sum.011, %for.body ]
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.inc, %entry
|
||||
%sum.0.lcssa = phi i32 [ 0, %entry ], [ %sum.1, %for.inc ]
|
||||
ret i32 %sum.0.lcssa
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue