forked from OSchip/llvm-project
Handle exp2 with proper vectorization and lowering to SVML calls
Summary: Add mapping from exp2 math functions to corresponding SVML calls. This is a follow up and extension for llvm diff https://reviews.llvm.org/D19544 Test Plan: - update test case and run ninja check. - run tests locally Reviewers: wenlei, hoyFB, mmasten, mzolotukhin, spatel Reviewed By: spatel Subscribers: llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D77114
This commit is contained in:
parent
88da019977
commit
a0983ed3d2
|
@ -245,6 +245,29 @@ TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf4", 4)
|
||||||
TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf8", 8)
|
TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf8", 8)
|
||||||
TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf16", 16)
|
TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf16", 16)
|
||||||
|
|
||||||
|
TLI_DEFINE_VECFUNC("exp2", "__svml_exp22", 2)
|
||||||
|
TLI_DEFINE_VECFUNC("exp2", "__svml_exp24", 4)
|
||||||
|
TLI_DEFINE_VECFUNC("exp2", "__svml_exp28", 8)
|
||||||
|
|
||||||
|
TLI_DEFINE_VECFUNC("exp2f", "__svml_exp2f4", 4)
|
||||||
|
TLI_DEFINE_VECFUNC("exp2f", "__svml_exp2f8", 8)
|
||||||
|
TLI_DEFINE_VECFUNC("exp2f", "__svml_exp2f16", 16)
|
||||||
|
|
||||||
|
TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__svml_exp22", 2)
|
||||||
|
TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__svml_exp24", 4)
|
||||||
|
TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__svml_exp28", 8)
|
||||||
|
|
||||||
|
TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__svml_exp2f4", 4)
|
||||||
|
TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__svml_exp2f8", 8)
|
||||||
|
TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__svml_exp2f16", 16)
|
||||||
|
|
||||||
|
TLI_DEFINE_VECFUNC("__exp2_finite", "__svml_exp22", 2)
|
||||||
|
TLI_DEFINE_VECFUNC("__exp2_finite", "__svml_exp24", 4)
|
||||||
|
TLI_DEFINE_VECFUNC("__exp2_finite", "__svml_exp28", 8)
|
||||||
|
|
||||||
|
TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f4", 4)
|
||||||
|
TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f8", 8)
|
||||||
|
TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f16", 16)
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error "Must choose which vector library functions are to be defined."
|
#error "Must choose which vector library functions are to be defined."
|
||||||
|
|
|
@ -28,6 +28,12 @@ declare float @logf(float) #0
|
||||||
declare double @llvm.log.f64(double) #0
|
declare double @llvm.log.f64(double) #0
|
||||||
declare float @llvm.log.f32(float) #0
|
declare float @llvm.log.f32(float) #0
|
||||||
|
|
||||||
|
declare double @exp2(double) #0
|
||||||
|
declare float @exp2f(float) #0
|
||||||
|
declare double @llvm.exp2.f64(double) #0
|
||||||
|
declare float @llvm.exp2.f32(float) #0
|
||||||
|
declare double @__exp2_finite(double) #0
|
||||||
|
declare float @__exp2f_finite(float) #0
|
||||||
|
|
||||||
define void @sin_f64(double* nocapture %varray) {
|
define void @sin_f64(double* nocapture %varray) {
|
||||||
; CHECK-LABEL: @sin_f64(
|
; CHECK-LABEL: @sin_f64(
|
||||||
|
@ -497,5 +503,142 @@ for.end:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { nounwind readnone }
|
define void @exp2_f64(double* nocapture %varray) {
|
||||||
|
; CHECK-LABEL: @exp2_f64(
|
||||||
|
; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])
|
||||||
|
; CHECK: ret void
|
||||||
|
;
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||||
|
%tmp = trunc i64 %iv to i32
|
||||||
|
%conv = sitofp i32 %tmp to double
|
||||||
|
%call = tail call double @exp2(double %conv)
|
||||||
|
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
||||||
|
store double %call, double* %arrayidx, align 4
|
||||||
|
%iv.next = add nuw nsw i64 %iv, 1
|
||||||
|
%exitcond = icmp eq i64 %iv.next, 1000
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @exp2_f32(float* nocapture %varray) {
|
||||||
|
; CHECK-LABEL: @exp2_f32(
|
||||||
|
; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_exp2f4(<4 x float> [[TMP4:%.*]])
|
||||||
|
; CHECK: ret void
|
||||||
|
;
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||||
|
%tmp = trunc i64 %iv to i32
|
||||||
|
%conv = sitofp i32 %tmp to float
|
||||||
|
%call = tail call float @exp2f(float %conv)
|
||||||
|
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
||||||
|
store float %call, float* %arrayidx, align 4
|
||||||
|
%iv.next = add nuw nsw i64 %iv, 1
|
||||||
|
%exitcond = icmp eq i64 %iv.next, 1000
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @exp2_f64_intrinsic(double* nocapture %varray) {
|
||||||
|
; CHECK-LABEL: @exp2_f64_intrinsic(
|
||||||
|
; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])
|
||||||
|
; CHECK: ret void
|
||||||
|
;
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||||
|
%tmp = trunc i64 %iv to i32
|
||||||
|
%conv = sitofp i32 %tmp to double
|
||||||
|
%call = tail call double @llvm.exp2.f64(double %conv)
|
||||||
|
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
||||||
|
store double %call, double* %arrayidx, align 4
|
||||||
|
%iv.next = add nuw nsw i64 %iv, 1
|
||||||
|
%exitcond = icmp eq i64 %iv.next, 1000
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @exp2_f32_intrinsic(float* nocapture %varray) {
|
||||||
|
; CHECK-LABEL: @exp2_f32_intrinsic(
|
||||||
|
; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_exp2f4(<4 x float> [[TMP4:%.*]])
|
||||||
|
; CHECK: ret void
|
||||||
|
;
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||||
|
%tmp = trunc i64 %iv to i32
|
||||||
|
%conv = sitofp i32 %tmp to float
|
||||||
|
%call = tail call float @llvm.exp2.f32(float %conv)
|
||||||
|
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
||||||
|
store float %call, float* %arrayidx, align 4
|
||||||
|
%iv.next = add nuw nsw i64 %iv, 1
|
||||||
|
%exitcond = icmp eq i64 %iv.next, 1000
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @exp2f_finite(float* nocapture %varray) {
|
||||||
|
; CHECK-LABEL: @exp2f_finite(
|
||||||
|
; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_exp2f4(<4 x float> [[TMP4:%.*]])
|
||||||
|
; CHECK: ret void
|
||||||
|
;
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||||
|
%tmp = trunc i64 %iv to i32
|
||||||
|
%conv = sitofp i32 %tmp to float
|
||||||
|
%call = tail call float @__exp2f_finite(float %conv)
|
||||||
|
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
||||||
|
store float %call, float* %arrayidx, align 4
|
||||||
|
%iv.next = add nuw nsw i64 %iv, 1
|
||||||
|
%exitcond = icmp eq i64 %iv.next, 1000
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @exp2_finite(double* nocapture %varray) {
|
||||||
|
; CHECK-LABEL: @exp2_finite(
|
||||||
|
; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])
|
||||||
|
; CHECK: ret void
|
||||||
|
;
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||||
|
%tmp = trunc i64 %iv to i32
|
||||||
|
%conv = sitofp i32 %tmp to double
|
||||||
|
%call = tail call double @__exp2_finite(double %conv)
|
||||||
|
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
||||||
|
store double %call, double* %arrayidx, align 4
|
||||||
|
%iv.next = add nuw nsw i64 %iv, 1
|
||||||
|
%exitcond = icmp eq i64 %iv.next, 1000
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { nounwind readnone }
|
||||||
|
|
Loading…
Reference in New Issue