[LoopVectorize] Classify pointer induction updates as scalar only if they have one use

collectLoopScalars collects pointer induction updates in ScalarPtrs, assuming
that the instruction will be scalar after vectorization. This may crash later
in VPReplicateRecipe::execute() if there there is another user of the instruction
other than the Phi node which needs to be widened.

This changes collectLoopScalars so that if there are any other users of
Update other than a Phi node, it is not added to ScalarPtrs.

Reviewed By: david-arm, fhahn

Differential Revision: https://reviews.llvm.org/D111294
This commit is contained in:
Kerry McLaughlin 2021-10-11 11:06:36 +01:00
parent 67b10532c6
commit 1439ef1a3f
2 changed files with 53 additions and 2 deletions

View File

@ -5130,8 +5130,14 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
Instruction *Update = cast<Instruction>(
cast<PHINode>(Ptr)->getIncomingValueForBlock(Latch));
ScalarPtrs.insert(Update);
return;
// If there is more than one user of Update (Ptr), we shouldn't assume it
// will be scalar after vectorisation as other users of the instruction
// may require widening. Otherwise, add it to ScalarPtrs.
if (Update->hasOneUse() && cast<Value>(*Update->user_begin()) == Ptr) {
ScalarPtrs.insert(Update);
return;
}
}
// We only care about bitcast and getelementptr instructions contained in
// the loop.

View File

@ -116,6 +116,51 @@ exit: ; preds = %loop.body
ret void
}
define void @pointer_induction(i8* noalias %start, i64 %N) {
; CHECK-LABEL: @pointer_induction(
; CHECK: vector.ph:
; CHECK: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i8*> poison, i8* [[START:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i8*> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i8*> poison, <vscale x 2 x i32> zeroinitializer
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[INDEX1]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = add <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP6]]
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[START]], <vscale x 2 x i64> [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], 0
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, i8* [[START]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 1
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, i8* [[START]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[NEXT_GEP3]], i32 0
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to <vscale x 2 x i8>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP12]], align 1
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, <vscale x 2 x i8*> [[NEXT_GEP]], i64 1
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <vscale x 2 x i8*> [[TMP13]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], [[TMP16]]
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
;
entry:
br label %for.body
for.body:
%ptr.phi = phi i8* [ %ptr.phi.next, %for.body ], [ %start, %entry ]
%index = phi i64 [ %index_nxt, %for.body ], [ 0, %entry ]
%index_nxt = add i64 %index, 1
%0 = load i8, i8* %ptr.phi, align 1
%ptr.phi.next = getelementptr inbounds i8, i8* %ptr.phi, i64 1
%cmp.i.not = icmp eq i8* %ptr.phi.next, %start
%cmp = icmp ult i64 %index, %N
br i1 %cmp, label %for.body, label %end, !llvm.loop !0
end:
ret void
}
attributes #0 = {"target-features"="+sve"}