Initial support for IBM MASS vector library

This is the LLVM portion of patch https://reviews.llvm.org/D59881.
The clang portion is to follow.

llvm-svn: 362568
This commit is contained in:
Nemanja Ivanovic 2019-06-05 01:31:43 +00:00
parent 2e05045e26
commit fe97754acf
7 changed files with 1884 additions and 1 deletions

View File

@ -86,6 +86,7 @@ public:
enum VectorLibrary {
NoLibrary, // Don't use any vector library.
Accelerate, // Use Accelerate framework.
MASSV, // IBM MASS vector library.
SVML // Intel short vector math library.
};

View File

@ -8,7 +8,7 @@
// This .def file will create mappings from scalar math functions to vector
// functions along with their vectorization factor. The current support includes
// such mappings for Accelerate framework and SVML library.
// such mappings for Accelerate framework, MASS vector library, and SVML library.
#if !(defined(TLI_DEFINE_VECFUNC))
#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) {SCAL, VEC, VF},
@ -55,6 +55,82 @@ TLI_DEFINE_VECFUNC("acoshf", "vacoshf", 4)
TLI_DEFINE_VECFUNC("atanhf", "vatanhf", 4)
#elif defined(TLI_DEFINE_MASSV_VECFUNCS)
// IBM MASS library's vector Functions
// Floating-Point Arithmetic and Auxiliary Functions
TLI_DEFINE_VECFUNC("cbrt", "__cbrtd2_massv", 2)
TLI_DEFINE_VECFUNC("cbrtf", "__cbrtf4_massv", 4)
TLI_DEFINE_VECFUNC("pow", "__powd2_massv", 2)
TLI_DEFINE_VECFUNC("llvm.pow.f64", "__powd2_massv", 2)
TLI_DEFINE_VECFUNC("powf", "__powf4_massv", 4)
TLI_DEFINE_VECFUNC("llvm.pow.f32", "__powf4_massv", 4)
TLI_DEFINE_VECFUNC("sqrt", "__sqrtd2_massv", 2)
TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__sqrtd2_massv", 2)
TLI_DEFINE_VECFUNC("sqrtf", "__sqrtf4_massv", 4)
TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__sqrtf4_massv", 4)
// Exponential and Logarithmic Functions
TLI_DEFINE_VECFUNC("exp", "__expd2_massv", 2)
TLI_DEFINE_VECFUNC("llvm.exp.f64", "__expd2_massv", 2)
TLI_DEFINE_VECFUNC("expf", "__expf4_massv", 4)
TLI_DEFINE_VECFUNC("llvm.exp.f32", "__expf4_massv", 4)
TLI_DEFINE_VECFUNC("exp2", "__exp2d2_massv", 2)
TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__exp2d2_massv", 2)
TLI_DEFINE_VECFUNC("exp2f", "__exp2f4_massv", 4)
TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__exp2f4_massv", 4)
TLI_DEFINE_VECFUNC("expm1", "__expm1d2_massv", 2)
TLI_DEFINE_VECFUNC("expm1f", "__expm1f4_massv", 4)
TLI_DEFINE_VECFUNC("log", "__logd2_massv", 2)
TLI_DEFINE_VECFUNC("llvm.log.f64", "__logd2_massv", 2)
TLI_DEFINE_VECFUNC("logf", "__logf4_massv", 4)
TLI_DEFINE_VECFUNC("llvm.log.f32", "__logf4_massv", 4)
TLI_DEFINE_VECFUNC("log1p", "__log1pd2_massv", 2)
TLI_DEFINE_VECFUNC("log1pf", "__log1pf4_massv", 4)
TLI_DEFINE_VECFUNC("log10", "__log10d2_massv", 2)
TLI_DEFINE_VECFUNC("llvm.log10.f64", "__log10d2_massv", 2)
TLI_DEFINE_VECFUNC("log10f", "__log10f4_massv", 4)
TLI_DEFINE_VECFUNC("llvm.log10.f32", "__log10f4_massv", 4)
TLI_DEFINE_VECFUNC("log2", "__log2d2_massv", 2)
TLI_DEFINE_VECFUNC("llvm.log2.f64", "__log2d2_massv", 2)
TLI_DEFINE_VECFUNC("log2f", "__log2f4_massv", 4)
TLI_DEFINE_VECFUNC("llvm.log2.f32", "__log2f4_massv", 4)
// Trigonometric Functions
TLI_DEFINE_VECFUNC("sin", "__sind2_massv", 2)
TLI_DEFINE_VECFUNC("llvm.sin.f64", "__sind2_massv", 2)
TLI_DEFINE_VECFUNC("sinf", "__sinf4_massv", 4)
TLI_DEFINE_VECFUNC("llvm.sin.f32", "__sinf4_massv", 4)
TLI_DEFINE_VECFUNC("cos", "__cosd2_massv", 2)
TLI_DEFINE_VECFUNC("llvm.cos.f64", "__cosd2_massv", 2)
TLI_DEFINE_VECFUNC("cosf", "__cosf4_massv", 4)
TLI_DEFINE_VECFUNC("llvm.cos.f32", "__cosf4_massv", 4)
TLI_DEFINE_VECFUNC("tan", "__tand2_massv", 2)
TLI_DEFINE_VECFUNC("tanf", "__tanf4_massv", 4)
TLI_DEFINE_VECFUNC("asin", "__asind2_massv", 2)
TLI_DEFINE_VECFUNC("asinf", "__asinf4_massv", 4)
TLI_DEFINE_VECFUNC("acos", "__acosd2_massv", 2)
TLI_DEFINE_VECFUNC("acosf", "__acosf4_massv", 4)
TLI_DEFINE_VECFUNC("atan", "__atand2_massv", 2)
TLI_DEFINE_VECFUNC("atanf", "__atanf4_massv", 4)
TLI_DEFINE_VECFUNC("atan2", "__atan2d2_massv", 2)
TLI_DEFINE_VECFUNC("atan2f", "__atan2f4_massv", 4)
// Hyperbolic Functions
TLI_DEFINE_VECFUNC("sinh", "__sinhd2_massv", 2)
TLI_DEFINE_VECFUNC("sinhf", "__sinhf4_massv", 4)
TLI_DEFINE_VECFUNC("cosh", "__coshd2_massv", 2)
TLI_DEFINE_VECFUNC("coshf", "__coshf4_massv", 4)
TLI_DEFINE_VECFUNC("tanh", "__tanhd2_massv", 2)
TLI_DEFINE_VECFUNC("tanhf", "__tanhf4_massv", 4)
TLI_DEFINE_VECFUNC("asinh", "__asinhd2_massv", 2)
TLI_DEFINE_VECFUNC("asinhf", "__asinhf4_massv", 4)
TLI_DEFINE_VECFUNC("acosh", "__acoshd2_massv", 2)
TLI_DEFINE_VECFUNC("acoshf", "__acoshf4_massv", 4)
TLI_DEFINE_VECFUNC("atanh", "__atanhd2_massv", 2)
TLI_DEFINE_VECFUNC("atanhf", "__atanhf4_massv", 4)
#elif defined(TLI_DEFINE_SVML_VECFUNCS)
// Intel SVM library's Vector Functions
@ -169,5 +245,6 @@ TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf16", 16)
#undef TLI_DEFINE_VECFUNC
#undef TLI_DEFINE_ACCELERATE_VECFUNCS
#undef TLI_DEFINE_MASSV_VECFUNCS
#undef TLI_DEFINE_SVML_VECFUNCS

View File

@ -23,6 +23,8 @@ static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
"No vector functions library"),
clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate",
"Accelerate framework"),
clEnumValN(TargetLibraryInfoImpl::MASSV, "MASSV",
"IBM MASS vector library"),
clEnumValN(TargetLibraryInfoImpl::SVML, "SVML",
"Intel SVML library")));
@ -1537,6 +1539,14 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
addVectorizableFunctions(VecFuncs);
break;
}
case MASSV: {
const VecDesc VecFuncs[] = {
#define TLI_DEFINE_MASSV_VECFUNCS
#include "llvm/Analysis/VecFuncs.def"
};
addVectorizableFunctions(VecFuncs);
break;
}
case SVML: {
const VecDesc VecFuncs[] = {
#define TLI_DEFINE_SVML_VECFUNCS

View File

@ -0,0 +1,106 @@
; RUN: opt -vector-library=MASSV -loop-vectorize -force-vector-interleave=1 -mattr=-altivec -S < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
declare double @cbrt(double) #0
declare float @cbrtf(float) #0
declare double @atanh(double) #0
declare float @atanhf(float) #0
; MASSV is unsupported for AltiVec.
; Check that massv entries are not generated.
define void @cbrt_f64(double* nocapture %varray) {
; CHECK-LABEL: @cbrt_f64(
; CHECK-NOT: __cbrtd2_massv{{.*}}<2 x double>
; CHECK: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%tmp = trunc i64 %iv to i32
%conv = sitofp i32 %tmp to double
%call = tail call double @cbrt(double %conv)
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
store double %call, double* %arrayidx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, 1000
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret void
}
define void @cbrt_f32(float* nocapture %varray) {
; CHECK-LABEL: @cbrt_f32(
; CHECK-NOT: __cbrtf4_massv{{.*}}<4 x float>
; CHECK: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%tmp = trunc i64 %iv to i32
%conv = sitofp i32 %tmp to float
%call = tail call float @cbrtf(float %conv)
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
store float %call, float* %arrayidx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, 1000
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret void
}
define void @atanh_f64(double* nocapture %varray) {
; CHECK-LABEL: @atanh_f64(
; CHECK-NOT: __atanhd2_massv{{.*}}<2 x double>
; CHECK: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%tmp = trunc i64 %iv to i32
%conv = sitofp i32 %tmp to double
%call = tail call double @atanh(double %conv)
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
store double %call, double* %arrayidx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, 1000
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret void
}
define void @atanh_f32(float* nocapture %varray) {
; CHECK-LABEL: @atanh_f32(
; CHECK-NOT: __atanhf4_massv{{.*}}<2 x double>
; CHECK: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%tmp = trunc i64 %iv to i32
%conv = sitofp i32 %tmp to float
%call = tail call float @atanhf(float %conv)
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
store float %call, float* %arrayidx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, 1000
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret void
}
attributes #0 = { nounwind }

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,56 @@
; RUN: opt -vector-library=MASSV -loop-vectorize -force-vector-interleave=1 -S < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
declare double @atanh(double) #1
declare float @atanhf(float) #1
; Check that functions marked as nobuiltin are not lowered to massv entries.
define void @atanh_f64(double* nocapture %varray) {
; CHECK-LABEL: @atanh_f64(
; CHECK-NOT: __atanhd2_massv{{.*}}<2 x double>
; CHECK: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%tmp = trunc i64 %iv to i32
%conv = sitofp i32 %tmp to double
%call = tail call double @atanh(double %conv)
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
store double %call, double* %arrayidx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, 1000
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret void
}
define void @atanh_f32(float* nocapture %varray) {
; CHECK-LABEL: @atanh_f32(
; CHECK-NOT: __atanhf4_massv{{.*}}<2 x double>
; CHECK: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%tmp = trunc i64 %iv to i32
%conv = sitofp i32 %tmp to float
%call = tail call float @atanhf(float %conv)
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
store float %call, float* %arrayidx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, 1000
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret void
}
attributes #1 = { nobuiltin nounwind }

View File

@ -0,0 +1,108 @@
; RUN: opt -vector-library=MASSV -loop-vectorize -force-vector-interleave=1 -S < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
declare double @ceil(double) #0
declare float @fabsf(float) #0
declare double @llvm.sqrt.f64(double) #0
declare float @llvm.sqrt.f32(float) #0
; Vector counterpart of ceil is unsupported in MASSV library.
define void @ceil_f64(double* nocapture %varray) {
; CHECK-LABEL: @ceil_f64(
; CHECK-NOT: __ceild2_massv{{.*}}<2 x double>
; CHECK: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%tmp = trunc i64 %iv to i32
%conv = sitofp i32 %tmp to double
%call = tail call double @ceil(double %conv)
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
store double %call, double* %arrayidx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, 1000
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret void
}
; Vector counterpart of fabs is unsupported in MASSV library.
define void @fabs_f32(float* nocapture %varray) {
; CHECK-LABEL: @fabs_f32(
; CHECK-NOT: __fabsf4_massv{{.*}}<4 x float>
; CHECK: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%tmp = trunc i64 %iv to i32
%conv = sitofp i32 %tmp to float
%call = tail call float @fabsf(float %conv)
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
store float %call, float* %arrayidx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, 1000
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret void
}
; sqrt intrinsics are converted to their vector counterpart intrinsics.
; They are not lowered to MASSV entries.
define void @sqrt_f64_intrinsic(double* nocapture %varray) {
; CHECK-LABEL: @sqrt_f64_intrinsic(
; CHECK: llvm.sqrt.v2f64{{.*}}<2 x double>
; CHECK: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%tmp = trunc i64 %iv to i32
%conv = sitofp i32 %tmp to double
%call = tail call double @llvm.sqrt.f64(double %conv)
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
store double %call, double* %arrayidx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, 1000
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret void
}
define void @sqrt_f32_intrinsic(float* nocapture %varray) {
; CHECK-LABEL: @sqrt_f32_intrinsic(
; CHECK: llvm.sqrt.v4f32{{.*}}<4 x float>
; CHECK: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%tmp = trunc i64 %iv to i32
%conv = sitofp i32 %tmp to float
%call = tail call float @llvm.sqrt.f32(float %conv)
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
store float %call, float* %arrayidx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, 1000
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret void
}
attributes #0 = { nounwind }