forked from OSchip/llvm-project
[LoopVectorize] Classify pointer induction updates as scalar only if they have one use
collectLoopScalars collects pointer induction updates in ScalarPtrs, assuming that the instruction will be scalar after vectorization. This may crash later in VPReplicateRecipe::execute() if there there is another user of the instruction other than the Phi node which needs to be widened. This changes collectLoopScalars so that if there are any other users of Update other than a Phi node, it is not added to ScalarPtrs. Reviewed By: david-arm, fhahn Differential Revision: https://reviews.llvm.org/D111294
This commit is contained in:
parent
67b10532c6
commit
1439ef1a3f
|
@ -5130,8 +5130,14 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
|
||||||
|
|
||||||
Instruction *Update = cast<Instruction>(
|
Instruction *Update = cast<Instruction>(
|
||||||
cast<PHINode>(Ptr)->getIncomingValueForBlock(Latch));
|
cast<PHINode>(Ptr)->getIncomingValueForBlock(Latch));
|
||||||
ScalarPtrs.insert(Update);
|
|
||||||
return;
|
// If there is more than one user of Update (Ptr), we shouldn't assume it
|
||||||
|
// will be scalar after vectorisation as other users of the instruction
|
||||||
|
// may require widening. Otherwise, add it to ScalarPtrs.
|
||||||
|
if (Update->hasOneUse() && cast<Value>(*Update->user_begin()) == Ptr) {
|
||||||
|
ScalarPtrs.insert(Update);
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// We only care about bitcast and getelementptr instructions contained in
|
// We only care about bitcast and getelementptr instructions contained in
|
||||||
// the loop.
|
// the loop.
|
||||||
|
|
|
@ -116,6 +116,51 @@ exit: ; preds = %loop.body
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define void @pointer_induction(i8* noalias %start, i64 %N) {
|
||||||
|
; CHECK-LABEL: @pointer_induction(
|
||||||
|
; CHECK: vector.ph:
|
||||||
|
; CHECK: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i8*> poison, i8* [[START:%.*]], i32 0
|
||||||
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i8*> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i8*> poison, <vscale x 2 x i32> zeroinitializer
|
||||||
|
; CHECK: vector.body:
|
||||||
|
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||||
|
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
|
||||||
|
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[INDEX1]], i32 0
|
||||||
|
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
|
||||||
|
; CHECK-NEXT: [[TMP6:%.*]] = add <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), [[TMP5]]
|
||||||
|
; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP6]]
|
||||||
|
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[START]], <vscale x 2 x i64> [[TMP7]]
|
||||||
|
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], 0
|
||||||
|
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, i8* [[START]], i64 [[TMP8]]
|
||||||
|
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 1
|
||||||
|
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, i8* [[START]], i64 [[TMP9]]
|
||||||
|
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0
|
||||||
|
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[NEXT_GEP3]], i32 0
|
||||||
|
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to <vscale x 2 x i8>*
|
||||||
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP12]], align 1
|
||||||
|
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, <vscale x 2 x i8*> [[NEXT_GEP]], i64 1
|
||||||
|
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <vscale x 2 x i8*> [[TMP13]], [[BROADCAST_SPLAT]]
|
||||||
|
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
|
||||||
|
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2
|
||||||
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], [[TMP16]]
|
||||||
|
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||||
|
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
||||||
|
;
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%ptr.phi = phi i8* [ %ptr.phi.next, %for.body ], [ %start, %entry ]
|
||||||
|
%index = phi i64 [ %index_nxt, %for.body ], [ 0, %entry ]
|
||||||
|
%index_nxt = add i64 %index, 1
|
||||||
|
%0 = load i8, i8* %ptr.phi, align 1
|
||||||
|
%ptr.phi.next = getelementptr inbounds i8, i8* %ptr.phi, i64 1
|
||||||
|
%cmp.i.not = icmp eq i8* %ptr.phi.next, %start
|
||||||
|
%cmp = icmp ult i64 %index, %N
|
||||||
|
br i1 %cmp, label %for.body, label %end, !llvm.loop !0
|
||||||
|
|
||||||
|
end:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
attributes #0 = {"target-features"="+sve"}
|
attributes #0 = {"target-features"="+sve"}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue