forked from OSchip/llvm-project
SVML support for log2
Although LLVM supports vectorization of loops containing log2, it did not support using SVML implementation of it. Added support so that when clang is invoked with -fveclib=SVML now an appropriate SVML library log2 implementation will be invoked. Follow up on: https://reviews.llvm.org/D77114 Tests: Added unit tests to svml-calls.ll, svml-calls-finite.ll. Can be run with llvm-lint. Created a simple c++ file that tests log2, and used clang+ to build it, and output final assembly. Reviewed By: wenlei, craig.topper Differential Revision: https://reviews.llvm.org/D86730
This commit is contained in:
parent
b2e65cf950
commit
d1be928d23
|
@ -245,6 +245,30 @@ TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf4", 4)
|
|||
TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf8", 8)
|
||||
TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf16", 16)
|
||||
|
||||
TLI_DEFINE_VECFUNC("log2", "__svml_log22", 2)
|
||||
TLI_DEFINE_VECFUNC("log2", "__svml_log24", 4)
|
||||
TLI_DEFINE_VECFUNC("log2", "__svml_log28", 8)
|
||||
|
||||
TLI_DEFINE_VECFUNC("log2f", "__svml_log2f4", 4)
|
||||
TLI_DEFINE_VECFUNC("log2f", "__svml_log2f8", 8)
|
||||
TLI_DEFINE_VECFUNC("log2f", "__svml_log2f16", 16)
|
||||
|
||||
TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log22", 2)
|
||||
TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log24", 4)
|
||||
TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log28", 8)
|
||||
|
||||
TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f4", 4)
|
||||
TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f8", 8)
|
||||
TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f16", 16)
|
||||
|
||||
TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log22", 2)
|
||||
TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log24", 4)
|
||||
TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log28", 8)
|
||||
|
||||
TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f4", 4)
|
||||
TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f8", 8)
|
||||
TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f16", 16)
|
||||
|
||||
TLI_DEFINE_VECFUNC("exp2", "__svml_exp22", 2)
|
||||
TLI_DEFINE_VECFUNC("exp2", "__svml_exp24", 4)
|
||||
TLI_DEFINE_VECFUNC("exp2", "__svml_exp28", 8)
|
||||
|
|
|
@ -243,3 +243,60 @@ for.end:
|
|||
!71 = distinct !{!71, !72, !73}
|
||||
!72 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!73 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
declare float @__log2f_finite(float) #0
|
||||
|
||||
; CHECK-LABEL: @log2_f32
|
||||
; CHECK: <4 x float> @__svml_log2f4
|
||||
; CHECK: ret
|
||||
define void @log2_f32(float* nocapture %varray) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%tmp = trunc i64 %indvars.iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call fast float @__log2f_finite(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
!81 = distinct !{!21, !22, !23}
|
||||
!82 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!83 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
|
||||
declare double @__log2_finite(double) #0
|
||||
|
||||
; CHECK-LABEL: @log2_f64
|
||||
; CHECK: <4 x double> @__svml_log24
|
||||
; CHECK: ret
|
||||
define void @log2_f64(double* nocapture %varray) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%tmp = trunc i64 %indvars.iv to i32
|
||||
%conv = sitofp i32 %tmp to double
|
||||
%call = tail call fast double @__log2_finite(double %conv)
|
||||
%arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv
|
||||
store double %call, double* %arrayidx, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
!91 = distinct !{!31, !32, !33}
|
||||
!92 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!93 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
|
|
@ -28,6 +28,11 @@ declare float @logf(float) #0
|
|||
declare double @llvm.log.f64(double) #0
|
||||
declare float @llvm.log.f32(float) #0
|
||||
|
||||
declare double @log2(double) #0
|
||||
declare float @log2f(float) #0
|
||||
declare double @llvm.log2.f64(double) #0
|
||||
declare float @llvm.log2.f32(float) #0
|
||||
|
||||
declare double @exp2(double) #0
|
||||
declare float @exp2f(float) #0
|
||||
declare double @llvm.exp2.f64(double) #0
|
||||
|
@ -501,6 +506,98 @@ for.end:
|
|||
ret void
|
||||
}
|
||||
|
||||
define void @log2_f64(double* nocapture %varray) {
|
||||
; CHECK-LABEL: @log2_f64(
|
||||
; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]])
|
||||
; CHECK: ret void
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to double
|
||||
%call = tail call double @log2(double %conv)
|
||||
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
||||
store double %call, double* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @log2_f32(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @log2_f32(
|
||||
; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]])
|
||||
; CHECK: ret void
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call float @log2f(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @log2_f64_intrinsic(double* nocapture %varray) {
|
||||
; CHECK-LABEL: @log2_f64_intrinsic(
|
||||
; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]])
|
||||
; CHECK: ret void
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to double
|
||||
%call = tail call double @llvm.log2.f64(double %conv)
|
||||
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
||||
store double %call, double* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @log2_f32_intrinsic(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @log2_f32_intrinsic(
|
||||
; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]])
|
||||
; CHECK: ret void
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call float @llvm.log2.f32(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @exp2_f64(double* nocapture %varray) {
|
||||
; CHECK-LABEL: @exp2_f64(
|
||||
; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])
|
||||
|
|
Loading…
Reference in New Issue