forked from OSchip/llvm-project
Initial support for IBM MASS vector library
This is the LLVM portion of patch https://reviews.llvm.org/D59881. The clang portion is to follow. llvm-svn: 362568
This commit is contained in:
parent
2e05045e26
commit
fe97754acf
|
@ -86,6 +86,7 @@ public:
|
|||
enum VectorLibrary {
|
||||
NoLibrary, // Don't use any vector library.
|
||||
Accelerate, // Use Accelerate framework.
|
||||
MASSV, // IBM MASS vector library.
|
||||
SVML // Intel short vector math library.
|
||||
};
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
|
||||
// This .def file will create mappings from scalar math functions to vector
|
||||
// functions along with their vectorization factor. The current support includes
|
||||
// such mappings for Accelerate framework and SVML library.
|
||||
// such mappings for Accelerate framework, MASS vector library, and SVML library.
|
||||
|
||||
#if !(defined(TLI_DEFINE_VECFUNC))
|
||||
#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) {SCAL, VEC, VF},
|
||||
|
@ -55,6 +55,82 @@ TLI_DEFINE_VECFUNC("acoshf", "vacoshf", 4)
|
|||
TLI_DEFINE_VECFUNC("atanhf", "vatanhf", 4)
|
||||
|
||||
|
||||
#elif defined(TLI_DEFINE_MASSV_VECFUNCS)
|
||||
// IBM MASS library's vector Functions
|
||||
|
||||
// Floating-Point Arithmetic and Auxiliary Functions
|
||||
TLI_DEFINE_VECFUNC("cbrt", "__cbrtd2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("cbrtf", "__cbrtf4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("pow", "__powd2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("llvm.pow.f64", "__powd2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("powf", "__powf4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("llvm.pow.f32", "__powf4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("sqrt", "__sqrtd2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__sqrtd2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("sqrtf", "__sqrtf4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__sqrtf4_massv", 4)
|
||||
|
||||
// Exponential and Logarithmic Functions
|
||||
TLI_DEFINE_VECFUNC("exp", "__expd2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("llvm.exp.f64", "__expd2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("expf", "__expf4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("llvm.exp.f32", "__expf4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("exp2", "__exp2d2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__exp2d2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("exp2f", "__exp2f4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__exp2f4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("expm1", "__expm1d2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("expm1f", "__expm1f4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("log", "__logd2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("llvm.log.f64", "__logd2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("logf", "__logf4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("llvm.log.f32", "__logf4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("log1p", "__log1pd2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("log1pf", "__log1pf4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("log10", "__log10d2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("llvm.log10.f64", "__log10d2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("log10f", "__log10f4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("llvm.log10.f32", "__log10f4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("log2", "__log2d2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("llvm.log2.f64", "__log2d2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("log2f", "__log2f4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("llvm.log2.f32", "__log2f4_massv", 4)
|
||||
|
||||
// Trigonometric Functions
|
||||
TLI_DEFINE_VECFUNC("sin", "__sind2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("llvm.sin.f64", "__sind2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("sinf", "__sinf4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("llvm.sin.f32", "__sinf4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("cos", "__cosd2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("llvm.cos.f64", "__cosd2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("cosf", "__cosf4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("llvm.cos.f32", "__cosf4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("tan", "__tand2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("tanf", "__tanf4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("asin", "__asind2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("asinf", "__asinf4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("acos", "__acosd2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("acosf", "__acosf4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("atan", "__atand2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("atanf", "__atanf4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("atan2", "__atan2d2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("atan2f", "__atan2f4_massv", 4)
|
||||
|
||||
// Hyperbolic Functions
|
||||
TLI_DEFINE_VECFUNC("sinh", "__sinhd2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("sinhf", "__sinhf4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("cosh", "__coshd2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("coshf", "__coshf4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("tanh", "__tanhd2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("tanhf", "__tanhf4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("asinh", "__asinhd2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("asinhf", "__asinhf4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("acosh", "__acoshd2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("acoshf", "__acoshf4_massv", 4)
|
||||
TLI_DEFINE_VECFUNC("atanh", "__atanhd2_massv", 2)
|
||||
TLI_DEFINE_VECFUNC("atanhf", "__atanhf4_massv", 4)
|
||||
|
||||
|
||||
#elif defined(TLI_DEFINE_SVML_VECFUNCS)
|
||||
// Intel SVM library's Vector Functions
|
||||
|
||||
|
@ -169,5 +245,6 @@ TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf16", 16)
|
|||
|
||||
#undef TLI_DEFINE_VECFUNC
|
||||
#undef TLI_DEFINE_ACCELERATE_VECFUNCS
|
||||
#undef TLI_DEFINE_MASSV_VECFUNCS
|
||||
#undef TLI_DEFINE_SVML_VECFUNCS
|
||||
|
||||
|
|
|
@ -23,6 +23,8 @@ static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
|
|||
"No vector functions library"),
|
||||
clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate",
|
||||
"Accelerate framework"),
|
||||
clEnumValN(TargetLibraryInfoImpl::MASSV, "MASSV",
|
||||
"IBM MASS vector library"),
|
||||
clEnumValN(TargetLibraryInfoImpl::SVML, "SVML",
|
||||
"Intel SVML library")));
|
||||
|
||||
|
@ -1537,6 +1539,14 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
|
|||
addVectorizableFunctions(VecFuncs);
|
||||
break;
|
||||
}
|
||||
case MASSV: {
|
||||
const VecDesc VecFuncs[] = {
|
||||
#define TLI_DEFINE_MASSV_VECFUNCS
|
||||
#include "llvm/Analysis/VecFuncs.def"
|
||||
};
|
||||
addVectorizableFunctions(VecFuncs);
|
||||
break;
|
||||
}
|
||||
case SVML: {
|
||||
const VecDesc VecFuncs[] = {
|
||||
#define TLI_DEFINE_SVML_VECFUNCS
|
||||
|
|
|
@ -0,0 +1,106 @@
|
|||
; RUN: opt -vector-library=MASSV -loop-vectorize -force-vector-interleave=1 -mattr=-altivec -S < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64le-unknown-linux-gnu"
|
||||
|
||||
declare double @cbrt(double) #0
|
||||
declare float @cbrtf(float) #0
|
||||
|
||||
declare double @atanh(double) #0
|
||||
declare float @atanhf(float) #0
|
||||
|
||||
; MASSV is unsupported for AltiVec.
|
||||
; Check that massv entries are not generated.
|
||||
define void @cbrt_f64(double* nocapture %varray) {
|
||||
; CHECK-LABEL: @cbrt_f64(
|
||||
; CHECK-NOT: __cbrtd2_massv{{.*}}<2 x double>
|
||||
; CHECK: ret void
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to double
|
||||
%call = tail call double @cbrt(double %conv)
|
||||
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
||||
store double %call, double* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @cbrt_f32(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @cbrt_f32(
|
||||
; CHECK-NOT: __cbrtf4_massv{{.*}}<4 x float>
|
||||
; CHECK: ret void
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call float @cbrtf(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @atanh_f64(double* nocapture %varray) {
|
||||
; CHECK-LABEL: @atanh_f64(
|
||||
; CHECK-NOT: __atanhd2_massv{{.*}}<2 x double>
|
||||
; CHECK: ret void
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to double
|
||||
%call = tail call double @atanh(double %conv)
|
||||
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
||||
store double %call, double* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @atanh_f32(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @atanh_f32(
|
||||
; CHECK-NOT: __atanhf4_massv{{.*}}<2 x double>
|
||||
; CHECK: ret void
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call float @atanhf(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,56 @@
|
|||
; RUN: opt -vector-library=MASSV -loop-vectorize -force-vector-interleave=1 -S < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64le-unknown-linux-gnu"
|
||||
|
||||
declare double @atanh(double) #1
|
||||
declare float @atanhf(float) #1
|
||||
|
||||
; Check that functions marked as nobuiltin are not lowered to massv entries.
|
||||
define void @atanh_f64(double* nocapture %varray) {
|
||||
; CHECK-LABEL: @atanh_f64(
|
||||
; CHECK-NOT: __atanhd2_massv{{.*}}<2 x double>
|
||||
; CHECK: ret void
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to double
|
||||
%call = tail call double @atanh(double %conv)
|
||||
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
||||
store double %call, double* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @atanh_f32(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @atanh_f32(
|
||||
; CHECK-NOT: __atanhf4_massv{{.*}}<2 x double>
|
||||
; CHECK: ret void
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call float @atanhf(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #1 = { nobuiltin nounwind }
|
|
@ -0,0 +1,108 @@
|
|||
; RUN: opt -vector-library=MASSV -loop-vectorize -force-vector-interleave=1 -S < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64le-unknown-linux-gnu"
|
||||
|
||||
declare double @ceil(double) #0
|
||||
declare float @fabsf(float) #0
|
||||
|
||||
declare double @llvm.sqrt.f64(double) #0
|
||||
declare float @llvm.sqrt.f32(float) #0
|
||||
|
||||
; Vector counterpart of ceil is unsupported in MASSV library.
|
||||
define void @ceil_f64(double* nocapture %varray) {
|
||||
; CHECK-LABEL: @ceil_f64(
|
||||
; CHECK-NOT: __ceild2_massv{{.*}}<2 x double>
|
||||
; CHECK: ret void
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to double
|
||||
%call = tail call double @ceil(double %conv)
|
||||
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
||||
store double %call, double* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Vector counterpart of fabs is unsupported in MASSV library.
|
||||
define void @fabs_f32(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @fabs_f32(
|
||||
; CHECK-NOT: __fabsf4_massv{{.*}}<4 x float>
|
||||
; CHECK: ret void
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call float @fabsf(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
; sqrt intrinsics are converted to their vector counterpart intrinsics.
|
||||
; They are not lowered to MASSV entries.
|
||||
define void @sqrt_f64_intrinsic(double* nocapture %varray) {
|
||||
; CHECK-LABEL: @sqrt_f64_intrinsic(
|
||||
; CHECK: llvm.sqrt.v2f64{{.*}}<2 x double>
|
||||
; CHECK: ret void
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to double
|
||||
%call = tail call double @llvm.sqrt.f64(double %conv)
|
||||
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
||||
store double %call, double* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sqrt_f32_intrinsic(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @sqrt_f32_intrinsic(
|
||||
; CHECK: llvm.sqrt.v4f32{{.*}}<4 x float>
|
||||
; CHECK: ret void
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call float @llvm.sqrt.f32(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
Loading…
Reference in New Issue