forked from OSchip/llvm-project
[LV] Allow reductions that have several uses outside the loop
We currently check whether a reduction has a single outside user. We don't really need to require that - we just need to make sure a single value is used externally. The number of external users of that value shouldn't actually matter. Differential Revision: https://reviews.llvm.org/D28830 llvm-svn: 292424
This commit is contained in:
parent
2ceeb30eb6
commit
7cefb409b0
|
@ -230,7 +230,9 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
|
|||
// - PHI:
|
||||
// - All uses of the PHI must be the reduction (safe).
|
||||
// - Otherwise, not safe.
|
||||
// - By one instruction outside of the loop (safe).
|
||||
// - By instructions outside of the loop (safe).
|
||||
// * One value may have several outside users, but all outside
|
||||
// uses must be of the same value.
|
||||
// - By further instructions outside of the loop (not safe).
|
||||
// - By an instruction that is not part of the reduction (not safe).
|
||||
// This is either:
|
||||
|
@ -297,10 +299,15 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
|
|||
// Check if we found the exit user.
|
||||
BasicBlock *Parent = UI->getParent();
|
||||
if (!TheLoop->contains(Parent)) {
|
||||
// Exit if you find multiple outside users or if the header phi node is
|
||||
// being used. In this case the user uses the value of the previous
|
||||
// iteration, in which case we would loose "VF-1" iterations of the
|
||||
// reduction operation if we vectorize.
|
||||
// If we already know this instruction is used externally, move on to
|
||||
// the next user.
|
||||
if (ExitInstruction == Cur)
|
||||
continue;
|
||||
|
||||
// Exit if you find multiple values used outside or if the header phi
|
||||
// node is being used. In this case the user uses the value of the
|
||||
// previous iteration, in which case we would loose "VF-1" iterations of
|
||||
// the reduction operation if we vectorize.
|
||||
if (ExitInstruction != nullptr || Cur == Phi)
|
||||
return false;
|
||||
|
||||
|
|
|
@ -4091,13 +4091,10 @@ void InnerLoopVectorizer::vectorizeLoop() {
|
|||
// we already fixed them.
|
||||
assert(LCSSAPhi->getNumIncomingValues() < 3 && "Invalid LCSSA PHI");
|
||||
|
||||
// We found our reduction value exit-PHI. Update it with the
|
||||
// We found a reduction value exit-PHI. Update it with the
|
||||
// incoming bypass edge.
|
||||
if (LCSSAPhi->getIncomingValue(0) == LoopExitInst) {
|
||||
// Add an edge coming from the bypass.
|
||||
if (LCSSAPhi->getIncomingValue(0) == LoopExitInst)
|
||||
LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
|
||||
break;
|
||||
}
|
||||
} // end of the LCSSA phi scan.
|
||||
|
||||
// Fix the scalar loop reduction variable with the incoming reduction sum
|
||||
|
|
|
@ -493,3 +493,49 @@ exit:
|
|||
%inc.2 = add nsw i32 %inc511.1.inc4.1, 2
|
||||
ret i32 %inc.2
|
||||
}
|
||||
|
||||
;CHECK-LABEL: @reduction_sum_multiuse(
|
||||
;CHECK: phi <4 x i32>
|
||||
;CHECK: load <4 x i32>
|
||||
;CHECK: add <4 x i32>
|
||||
;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
;CHECK: add <4 x i32>
|
||||
;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
;CHECK: add <4 x i32>
|
||||
;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
|
||||
;CHECK: %sum.lcssa = phi i32 [ %[[SCALAR:.*]], %.lr.ph ], [ %[[VECTOR:.*]], %middle.block ]
|
||||
;CHECK: %sum.copy = phi i32 [ %[[SCALAR]], %.lr.ph ], [ %[[VECTOR]], %middle.block ]
|
||||
;CHECK: ret i32
|
||||
define i32 @reduction_sum_multiuse(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) {
|
||||
%1 = icmp sgt i32 %n, 0
|
||||
br i1 %1, label %.lr.ph.preheader, label %end
|
||||
.lr.ph.preheader: ; preds = %0
|
||||
br label %.lr.ph
|
||||
|
||||
.lr.ph: ; preds = %0, %.lr.ph
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
|
||||
%sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %.lr.ph.preheader ]
|
||||
%2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
||||
%3 = load i32, i32* %2, align 4
|
||||
%4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
|
||||
%5 = load i32, i32* %4, align 4
|
||||
%6 = trunc i64 %indvars.iv to i32
|
||||
%7 = add i32 %sum.02, %6
|
||||
%8 = add i32 %7, %3
|
||||
%9 = add i32 %8, %5
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %._crit_edge, label %.lr.ph
|
||||
|
||||
._crit_edge: ; preds = %.lr.ph, %0
|
||||
%sum.lcssa = phi i32 [ %9, %.lr.ph ]
|
||||
%sum.copy = phi i32 [ %9, %.lr.ph ]
|
||||
br label %end
|
||||
|
||||
end:
|
||||
%f1 = phi i32 [ 0, %0 ], [ %sum.lcssa, %._crit_edge ]
|
||||
%f2 = phi i32 [ 0, %0 ], [ %sum.copy, %._crit_edge ]
|
||||
%final = add i32 %f1, %f2
|
||||
ret i32 %final
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue