forked from OSchip/llvm-project
[LoopVectorize][SVE] Fix crash when vectorising FP negation
This patch fixes a crash encountered when vectorising the following loop: void foo(float *dst, float *src, long long n) { for (long long i = 0; i < n; i++) dst[i] = -src[i]; } using scalable vectors. I've added a test to Transforms/LoopVectorize/AArch64/sve-basic-vec.ll as well as cleaned up the other tests in the same file. Differential Revision: https://reviews.llvm.org/D98054
This commit is contained in:
parent
045781a5ce
commit
00e65f3345
|
@ -7549,7 +7549,6 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
|
|||
Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands, I);
|
||||
}
|
||||
case Instruction::FNeg: {
|
||||
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
|
||||
return TTI.getArithmeticInstrCost(
|
||||
I->getOpcode(), VectorTy, CostKind, TargetTransformInfo::OK_AnyValue,
|
||||
TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None,
|
||||
|
|
|
@ -14,8 +14,7 @@ define void @cmpsel_i32(i32* noalias nocapture %a, i32* noalias nocapture readon
|
|||
; CHECK: store <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32>* {{.*}}, align 4
|
||||
;
|
||||
entry:
|
||||
%cmp7 = icmp sgt i64 %n, 0
|
||||
br i1 %cmp7, label %for.body, label %for.end
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
|
@ -46,8 +45,7 @@ define void @cmpsel_f32(float* noalias nocapture %a, float* noalias nocapture re
|
|||
; CHECK: store <vscale x 4 x float> [[TMP2]], <vscale x 4 x float>* {{.*}}, align 4
|
||||
|
||||
entry:
|
||||
%cmp8 = icmp sgt i64 %n, 0
|
||||
br i1 %cmp8, label %for.body, label %for.end
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
|
@ -59,7 +57,33 @@ for.body: ; preds = %entry, %for.body
|
|||
store float %conv, float* %arrayidx3, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond.not = icmp eq i64 %indvars.iv.next, %n
|
||||
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !6
|
||||
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fneg_f32(float* noalias nocapture %a, float* noalias nocapture readonly %b, i64 %n) {
|
||||
; CHECK-LABEL: @fneg_f32(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK: vector.body:
|
||||
; CHECK: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, <vscale x 4 x float>* {{.*}}, align 4
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fneg <vscale x 4 x float> [[WIDE_LOAD]]
|
||||
; CHECK: store <vscale x 4 x float> [[TMP1]], <vscale x 4 x float>* {{.*}}, align 4
|
||||
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
|
||||
%0 = load float, float* %arrayidx, align 4
|
||||
%fneg = fneg float %0
|
||||
%arrayidx3 = getelementptr inbounds float, float* %a, i64 %indvars.iv
|
||||
store float %fneg, float* %arrayidx3, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond.not = icmp eq i64 %indvars.iv.next, %n
|
||||
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
|
@ -71,4 +95,3 @@ for.end: ; preds = %for.body, %entry
|
|||
!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
|
||||
!4 = !{!"llvm.loop.interleave.count", i32 1}
|
||||
!5 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
!6 = distinct !{!6, !1, !2, !3, !4, !5}
|
||||
|
|
Loading…
Reference in New Issue