[LV] Allow reductions that have several uses outside the loop

We currently check whether a reduction has a single outside user. We don't
really need to require that - we just need to make sure a single value is
used externally. The number of external users of that value shouldn't actually
matter.

Differential Revision: https://reviews.llvm.org/D28830

llvm-svn: 292424
This commit is contained in:
Michael Kuperstein 2017-01-18 19:02:52 +00:00
parent 2ceeb30eb6
commit 7cefb409b0
3 changed files with 60 additions and 10 deletions

View File

@ -230,7 +230,9 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
// - PHI:
// - All uses of the PHI must be the reduction (safe).
// - Otherwise, not safe.
// - By one instruction outside of the loop (safe).
// - By instructions outside of the loop (safe).
// * One value may have several outside users, but all outside
// uses must be of the same value.
// - By further instructions outside of the loop (not safe).
// - By an instruction that is not part of the reduction (not safe).
// This is either:
@ -297,10 +299,15 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
// Check if we found the exit user.
BasicBlock *Parent = UI->getParent();
if (!TheLoop->contains(Parent)) {
// Exit if you find multiple outside users or if the header phi node is
// being used. In this case the user uses the value of the previous
// iteration, in which case we would loose "VF-1" iterations of the
// reduction operation if we vectorize.
// If we already know this instruction is used externally, move on to
// the next user.
if (ExitInstruction == Cur)
continue;
// Exit if you find multiple values used outside or if the header phi
// node is being used. In this case the user uses the value of the
// previous iteration, in which case we would loose "VF-1" iterations of
// the reduction operation if we vectorize.
if (ExitInstruction != nullptr || Cur == Phi)
return false;

View File

@ -4091,13 +4091,10 @@ void InnerLoopVectorizer::vectorizeLoop() {
// we already fixed them.
assert(LCSSAPhi->getNumIncomingValues() < 3 && "Invalid LCSSA PHI");
// We found our reduction value exit-PHI. Update it with the
// We found a reduction value exit-PHI. Update it with the
// incoming bypass edge.
if (LCSSAPhi->getIncomingValue(0) == LoopExitInst) {
// Add an edge coming from the bypass.
if (LCSSAPhi->getIncomingValue(0) == LoopExitInst)
LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
break;
}
} // end of the LCSSA phi scan.
// Fix the scalar loop reduction variable with the incoming reduction sum

View File

@ -493,3 +493,49 @@ exit:
%inc.2 = add nsw i32 %inc511.1.inc4.1, 2
ret i32 %inc.2
}
;CHECK-LABEL: @reduction_sum_multiuse(
;CHECK: phi <4 x i32>
;CHECK: load <4 x i32>
;CHECK: add <4 x i32>
;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
;CHECK: add <4 x i32>
;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
;CHECK: add <4 x i32>
;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
;CHECK: %sum.lcssa = phi i32 [ %[[SCALAR:.*]], %.lr.ph ], [ %[[VECTOR:.*]], %middle.block ]
;CHECK: %sum.copy = phi i32 [ %[[SCALAR]], %.lr.ph ], [ %[[VECTOR]], %middle.block ]
;CHECK: ret i32
define i32 @reduction_sum_multiuse(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) {
%1 = icmp sgt i32 %n, 0
br i1 %1, label %.lr.ph.preheader, label %end
.lr.ph.preheader: ; preds = %0
br label %.lr.ph
.lr.ph: ; preds = %0, %.lr.ph
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
%sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %.lr.ph.preheader ]
%2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%3 = load i32, i32* %2, align 4
%4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
%5 = load i32, i32* %4, align 4
%6 = trunc i64 %indvars.iv to i32
%7 = add i32 %sum.02, %6
%8 = add i32 %7, %3
%9 = add i32 %8, %5
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %._crit_edge, label %.lr.ph
._crit_edge: ; preds = %.lr.ph, %0
%sum.lcssa = phi i32 [ %9, %.lr.ph ]
%sum.copy = phi i32 [ %9, %.lr.ph ]
br label %end
end:
%f1 = phi i32 [ 0, %0 ], [ %sum.lcssa, %._crit_edge ]
%f2 = phi i32 [ 0, %0 ], [ %sum.copy, %._crit_edge ]
%final = add i32 %f1, %f2
ret i32 %final
}