forked from OSchip/llvm-project
[LoopVectorize] Loop vectorization for minimum and maximum
Summary: Depends on D52766. Reviewers: aheejin, dschuff Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D52767 llvm-svn: 344816
This commit is contained in:
parent
f95f763ea5
commit
8a91cf1cc5
|
@ -54,6 +54,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
|
|||
case Intrinsic::fabs:
|
||||
case Intrinsic::minnum:
|
||||
case Intrinsic::maxnum:
|
||||
case Intrinsic::minimum:
|
||||
case Intrinsic::maximum:
|
||||
case Intrinsic::copysign:
|
||||
case Intrinsic::floor:
|
||||
case Intrinsic::ceil:
|
||||
|
|
|
@ -1247,3 +1247,59 @@ for.body: ; preds = %entry, %for.body
|
|||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.minimum.f32(float, float) nounwind readnone
|
||||
|
||||
;CHECK-LABEL: @minimum_f32(
|
||||
;CHECK: llvm.minimum.v4f32
|
||||
;CHECK: ret void
|
||||
define void @minimum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
|
||||
entry:
|
||||
%cmp9 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp9, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
|
||||
%0 = load float, float* %arrayidx, align 4
|
||||
%arrayidx2 = getelementptr inbounds float, float* %z, i64 %indvars.iv
|
||||
%1 = load float, float* %arrayidx2, align 4
|
||||
%call = tail call float @llvm.minimum.f32(float %0, float %1) nounwind readnone
|
||||
%arrayidx4 = getelementptr inbounds float, float* %x, i64 %indvars.iv
|
||||
store float %call, float* %arrayidx4, align 4
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.maximum.f32(float, float) nounwind readnone
|
||||
|
||||
;CHECK-LABEL: @maximum_f32(
|
||||
;CHECK: llvm.maximum.v4f32
|
||||
;CHECK: ret void
|
||||
define void @maximum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
|
||||
entry:
|
||||
%cmp9 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp9, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
|
||||
%0 = load float, float* %arrayidx, align 4
|
||||
%arrayidx2 = getelementptr inbounds float, float* %z, i64 %indvars.iv
|
||||
%1 = load float, float* %arrayidx2, align 4
|
||||
%call = tail call float @llvm.maximum.f32(float %0, float %1) nounwind readnone
|
||||
%arrayidx4 = getelementptr inbounds float, float* %x, i64 %indvars.iv
|
||||
store float %call, float* %arrayidx4, align 4
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -5,6 +5,8 @@ declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
|
|||
|
||||
; Binary fp
|
||||
declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>)
|
||||
declare <2 x float> @llvm.minimum.v2f32(<2 x float>, <2 x float>)
|
||||
declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>)
|
||||
|
||||
; Ternary fp
|
||||
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
|
||||
|
@ -40,6 +42,28 @@ define <2 x float> @scalarize_minnum_v2f32(<2 x float> %x, <2 x float> %y) #0 {
|
|||
ret <2 x float> %minnum
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @scalarize_minimum_v2f32(
|
||||
; CHECK: %minimum.i0 = call float @llvm.minimum.f32(float %x.i0, float %y.i0)
|
||||
; CHECK: %minimum.i1 = call float @llvm.minimum.f32(float %x.i1, float %y.i1)
|
||||
; CHECK: %minimum.upto0 = insertelement <2 x float> undef, float %minimum.i0, i32 0
|
||||
; CHECK: %minimum = insertelement <2 x float> %minimum.upto0, float %minimum.i1, i32 1
|
||||
; CHECK: ret <2 x float> %minimum
|
||||
define <2 x float> @scalarize_minimum_v2f32(<2 x float> %x, <2 x float> %y) #0 {
|
||||
%minimum = call <2 x float> @llvm.minimum.v2f32(<2 x float> %x, <2 x float> %y)
|
||||
ret <2 x float> %minimum
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @scalarize_maximum_v2f32(
|
||||
; CHECK: %maximum.i0 = call float @llvm.maximum.f32(float %x.i0, float %y.i0)
|
||||
; CHECK: %maximum.i1 = call float @llvm.maximum.f32(float %x.i1, float %y.i1)
|
||||
; CHECK: %maximum.upto0 = insertelement <2 x float> undef, float %maximum.i0, i32 0
|
||||
; CHECK: %maximum = insertelement <2 x float> %maximum.upto0, float %maximum.i1, i32 1
|
||||
; CHECK: ret <2 x float> %maximum
|
||||
define <2 x float> @scalarize_maximum_v2f32(<2 x float> %x, <2 x float> %y) #0 {
|
||||
%maximum = call <2 x float> @llvm.maximum.v2f32(<2 x float> %x, <2 x float> %y)
|
||||
ret <2 x float> %maximum
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @scalarize_fma_v2f32(
|
||||
; CHECK: %fma.i0 = call float @llvm.fma.f32(float %x.i0, float %y.i0, float %z.i0)
|
||||
; CHECK: %fma.i1 = call float @llvm.fma.f32(float %x.i1, float %y.i1, float %z.i1)
|
||||
|
|
Loading…
Reference in New Issue