[PowerPC] Add pwr7 and pwr10 support to IBM MASSV pass on AIX

Before MASSV only supported P8 and P9 on AIX ans Linux . This patch proposes
MASSV to add support of P7 and P10 only on AIX too.

Differential: https://reviews.llvm.org/D106678
This commit is contained in:
Masoud Ataei 2021-07-26 23:21:38 +00:00
parent f9f56488e0
commit 45951ad323
13 changed files with 608 additions and 474 deletions

View File

@ -203,72 +203,72 @@ TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVdN8v_logf", FIXED(8))
// IBM MASS library's vector Functions
// Floating-Point Arithmetic and Auxiliary Functions
TLI_DEFINE_VECFUNC("cbrt", "__cbrtd2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("cbrtf", "__cbrtf4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("pow", "__powd2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("llvm.pow.f64", "__powd2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("powf", "__powf4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("llvm.pow.f32", "__powf4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("cbrt", "__cbrtd2", FIXED(2))
TLI_DEFINE_VECFUNC("cbrtf", "__cbrtf4", FIXED(4))
TLI_DEFINE_VECFUNC("pow", "__powd2", FIXED(2))
TLI_DEFINE_VECFUNC("llvm.pow.f64", "__powd2", FIXED(2))
TLI_DEFINE_VECFUNC("powf", "__powf4", FIXED(4))
TLI_DEFINE_VECFUNC("llvm.pow.f32", "__powf4", FIXED(4))
// Exponential and Logarithmic Functions
TLI_DEFINE_VECFUNC("exp", "__expd2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("llvm.exp.f64", "__expd2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("expf", "__expf4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("llvm.exp.f32", "__expf4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("exp2", "__exp2d2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__exp2d2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("exp2f", "__exp2f4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__exp2f4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("expm1", "__expm1d2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("expm1f", "__expm1f4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("log", "__logd2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("llvm.log.f64", "__logd2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("logf", "__logf4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("llvm.log.f32", "__logf4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("log1p", "__log1pd2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("log1pf", "__log1pf4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("log10", "__log10d2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("llvm.log10.f64", "__log10d2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("log10f", "__log10f4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("llvm.log10.f32", "__log10f4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("log2", "__log2d2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("llvm.log2.f64", "__log2d2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("log2f", "__log2f4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("llvm.log2.f32", "__log2f4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("exp", "__expd2", FIXED(2))
TLI_DEFINE_VECFUNC("llvm.exp.f64", "__expd2", FIXED(2))
TLI_DEFINE_VECFUNC("expf", "__expf4", FIXED(4))
TLI_DEFINE_VECFUNC("llvm.exp.f32", "__expf4", FIXED(4))
TLI_DEFINE_VECFUNC("exp2", "__exp2d2", FIXED(2))
TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__exp2d2", FIXED(2))
TLI_DEFINE_VECFUNC("exp2f", "__exp2f4", FIXED(4))
TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__exp2f4", FIXED(4))
TLI_DEFINE_VECFUNC("expm1", "__expm1d2", FIXED(2))
TLI_DEFINE_VECFUNC("expm1f", "__expm1f4", FIXED(4))
TLI_DEFINE_VECFUNC("log", "__logd2", FIXED(2))
TLI_DEFINE_VECFUNC("llvm.log.f64", "__logd2", FIXED(2))
TLI_DEFINE_VECFUNC("logf", "__logf4", FIXED(4))
TLI_DEFINE_VECFUNC("llvm.log.f32", "__logf4", FIXED(4))
TLI_DEFINE_VECFUNC("log1p", "__log1pd2", FIXED(2))
TLI_DEFINE_VECFUNC("log1pf", "__log1pf4", FIXED(4))
TLI_DEFINE_VECFUNC("log10", "__log10d2", FIXED(2))
TLI_DEFINE_VECFUNC("llvm.log10.f64", "__log10d2", FIXED(2))
TLI_DEFINE_VECFUNC("log10f", "__log10f4", FIXED(4))
TLI_DEFINE_VECFUNC("llvm.log10.f32", "__log10f4", FIXED(4))
TLI_DEFINE_VECFUNC("log2", "__log2d2", FIXED(2))
TLI_DEFINE_VECFUNC("llvm.log2.f64", "__log2d2", FIXED(2))
TLI_DEFINE_VECFUNC("log2f", "__log2f4", FIXED(4))
TLI_DEFINE_VECFUNC("llvm.log2.f32", "__log2f4", FIXED(4))
// Trigonometric Functions
TLI_DEFINE_VECFUNC("sin", "__sind2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("llvm.sin.f64", "__sind2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("sinf", "__sinf4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("llvm.sin.f32", "__sinf4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("cos", "__cosd2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("llvm.cos.f64", "__cosd2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("cosf", "__cosf4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("llvm.cos.f32", "__cosf4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("tan", "__tand2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("tanf", "__tanf4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("asin", "__asind2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("asinf", "__asinf4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("acos", "__acosd2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("acosf", "__acosf4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("atan", "__atand2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("atanf", "__atanf4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("atan2", "__atan2d2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("atan2f", "__atan2f4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("sin", "__sind2", FIXED(2))
TLI_DEFINE_VECFUNC("llvm.sin.f64", "__sind2", FIXED(2))
TLI_DEFINE_VECFUNC("sinf", "__sinf4", FIXED(4))
TLI_DEFINE_VECFUNC("llvm.sin.f32", "__sinf4", FIXED(4))
TLI_DEFINE_VECFUNC("cos", "__cosd2", FIXED(2))
TLI_DEFINE_VECFUNC("llvm.cos.f64", "__cosd2", FIXED(2))
TLI_DEFINE_VECFUNC("cosf", "__cosf4", FIXED(4))
TLI_DEFINE_VECFUNC("llvm.cos.f32", "__cosf4", FIXED(4))
TLI_DEFINE_VECFUNC("tan", "__tand2", FIXED(2))
TLI_DEFINE_VECFUNC("tanf", "__tanf4", FIXED(4))
TLI_DEFINE_VECFUNC("asin", "__asind2", FIXED(2))
TLI_DEFINE_VECFUNC("asinf", "__asinf4", FIXED(4))
TLI_DEFINE_VECFUNC("acos", "__acosd2", FIXED(2))
TLI_DEFINE_VECFUNC("acosf", "__acosf4", FIXED(4))
TLI_DEFINE_VECFUNC("atan", "__atand2", FIXED(2))
TLI_DEFINE_VECFUNC("atanf", "__atanf4", FIXED(4))
TLI_DEFINE_VECFUNC("atan2", "__atan2d2", FIXED(2))
TLI_DEFINE_VECFUNC("atan2f", "__atan2f4", FIXED(4))
// Hyperbolic Functions
TLI_DEFINE_VECFUNC("sinh", "__sinhd2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("sinhf", "__sinhf4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("cosh", "__coshd2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("coshf", "__coshf4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("tanh", "__tanhd2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("tanhf", "__tanhf4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("asinh", "__asinhd2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("asinhf", "__asinhf4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("acosh", "__acoshd2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("acoshf", "__acoshf4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("atanh", "__atanhd2_P8", FIXED(2))
TLI_DEFINE_VECFUNC("atanhf", "__atanhf4_P8", FIXED(4))
TLI_DEFINE_VECFUNC("sinh", "__sinhd2", FIXED(2))
TLI_DEFINE_VECFUNC("sinhf", "__sinhf4", FIXED(4))
TLI_DEFINE_VECFUNC("cosh", "__coshd2", FIXED(2))
TLI_DEFINE_VECFUNC("coshf", "__coshf4", FIXED(4))
TLI_DEFINE_VECFUNC("tanh", "__tanhd2", FIXED(2))
TLI_DEFINE_VECFUNC("tanhf", "__tanhf4", FIXED(4))
TLI_DEFINE_VECFUNC("asinh", "__asinhd2", FIXED(2))
TLI_DEFINE_VECFUNC("asinhf", "__asinhf4", FIXED(4))
TLI_DEFINE_VECFUNC("acosh", "__acoshd2", FIXED(2))
TLI_DEFINE_VECFUNC("acoshf", "__acoshf4", FIXED(4))
TLI_DEFINE_VECFUNC("atanh", "__atanhd2", FIXED(2))
TLI_DEFINE_VECFUNC("atanhf", "__atanhf4", FIXED(4))
#elif defined(TLI_DEFINE_SVML_VECFUNCS)

View File

@ -28,9 +28,6 @@ using namespace llvm;
namespace {
// Length of the suffix "massv", which is specific to IBM MASSV library entries.
const unsigned MASSVSuffixLength = 2;
static StringRef MASSVFuncs[] = {
#define TLI_DEFINE_MASSV_VECFUNCS_NAMES
#include "llvm/Analysis/VecFuncs.def"
@ -70,21 +67,26 @@ bool PPCLowerMASSVEntries::isMASSVFunc(StringRef Name) {
// FIXME:
/// Returns a string corresponding to the specified PowerPC subtarget. e.g.:
/// "P8" for Power8, "P9" for Power9. The string is used as a suffix while
/// "_P8" for Power8, "_P9" for Power9. The string is used as a suffix while
/// generating subtarget-specific MASSV library functions. Current support
/// includes Power8 and Power9 subtargets.
/// includes minimum subtarget Power8 for Linux and Power7 for AIX.
StringRef PPCLowerMASSVEntries::getCPUSuffix(const PPCSubtarget *Subtarget) {
// Assume Power8 when Subtarget is unavailable.
// Assume generic when Subtarget is unavailable.
if (!Subtarget)
return "P8";
return "";
// TODO: add _P10 enties to Linux MASS lib and remove the check for AIX
if (Subtarget->isAIXABI() && Subtarget->hasP10Vector())
return "_P10";
if (Subtarget->hasP9Vector())
return "P9";
return "_P9";
if (Subtarget->hasP8Vector())
return "P8";
return "_P8";
if (Subtarget->isAIXABI())
return "_P7";
report_fatal_error(
"-vector-library=MASSV option is supported only on Power8 and later "
"subtargets when vectorization is not disabled.");
"Mininum subtarget for -vector-library=MASSV option is Power8 on Linux "
"and Power7 on AIX when vectorization is not disabled.");
}
/// Creates PowerPC subtarget-specific name corresponding to the specified
@ -93,7 +95,7 @@ std::string
PPCLowerMASSVEntries::createMASSVFuncName(Function &Func,
const PPCSubtarget *Subtarget) {
StringRef Suffix = getCPUSuffix(Subtarget);
auto GenericName = Func.getName().drop_back(MASSVSuffixLength).str();
auto GenericName = Func.getName().str();
std::string MASSVEntryName = GenericName + Suffix.str();
return MASSVEntryName;
}
@ -102,7 +104,7 @@ PPCLowerMASSVEntries::createMASSVFuncName(Function &Func,
/// intrinsics when the exponent is 0.25 or 0.75.
bool PPCLowerMASSVEntries::handlePowSpecialCases(CallInst *CI, Function &Func,
Module &M) {
if (Func.getName() != "__powf4_P8" && Func.getName() != "__powd2_P8")
if (Func.getName() != "__powf4" && Func.getName() != "__powd2")
return false;
if (Constant *Exp = dyn_cast<Constant>(CI->getArgOperand(1)))

View File

@ -47,7 +47,7 @@ define <4 x float> @exp_f32(<4 x float> %in) {
;
; MASSV-LABEL: define {{[^@]+}}@exp_f32
; MASSV-SAME: (<4 x float> [[IN:%.*]]) {
; MASSV-NEXT: [[TMP1:%.*]] = call <4 x float> @__expf4_P8(<4 x float> [[IN]])
; MASSV-NEXT: [[TMP1:%.*]] = call <4 x float> @__expf4(<4 x float> [[IN]])
; MASSV-NEXT: ret <4 x float> [[TMP1]]
;
; ACCELERATE-LABEL: define {{[^@]+}}@exp_f32

View File

@ -1,8 +1,8 @@
; RUN: llc -verify-machineinstrs -mcpu=pwr9 < %s -mtriple=powerpc64le-unknown-linux-gnu -mattr=-power9-vector | FileCheck -check-prefixes=CHECK-PWR8,CHECK-ALL %s
; RUN: llc -verify-machineinstrs -mcpu=pwr8 < %s -mtriple=powerpc64le-unknown-linux-gnu -mattr=+power9-vector | FileCheck -check-prefixes=CHECK-PWR9,CHECK-ALL %s
declare <2 x double> @__cbrtd2_P8(<2 x double>)
declare <4 x float> @__cbrtf4_P8(<4 x float>)
declare <2 x double> @__cbrtd2(<2 x double>)
declare <4 x float> @__cbrtf4(<4 x float>)
; cbrt without the power9-vector attribute on the caller
; check massv calls are correctly targeted for Power8
@ -12,7 +12,7 @@ define <2 x double> @cbrt_f64_massv_nopwr9(<2 x double> %opnd) #0 {
; CHECK-NOT: bl __cbrtd2_massv
; CHECK-ALL: blr
;
%1 = call <2 x double> @__cbrtd2_P8(<2 x double> %opnd)
%1 = call <2 x double> @__cbrtd2(<2 x double> %opnd)
ret <2 x double> %1
}
@ -25,6 +25,6 @@ define <2 x double> @cbrt_f64_massv_pwr9(<2 x double> %opnd) #1 {
; CHECK-NOT: bl __cbrtd2_P8
; CHECK-ALL: blr
;
%1 = call <2 x double> @__cbrtd2_P8(<2 x double> %opnd)
%1 = call <2 x double> @__cbrtd2(<2 x double> %opnd)
ret <2 x double> %1
}

View File

@ -1,577 +1,671 @@
; RUN: llc -verify-machineinstrs -mcpu=pwr9 < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck -check-prefixes=CHECK-PWR9,CHECK-ALL %s
; RUN: llc -verify-machineinstrs -mcpu=pwr8 < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck -check-prefixes=CHECK-PWR8,CHECK-ALL %s
; RUN: llc -verify-machineinstrs -mcpu=pwr8 < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck --check-prefix=CHECK-ALL %s
; RUN: llc -verify-machineinstrs -mcpu=pwr10 < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck -check-prefixes=CHECK-PWR9,CHECK-ALL %s
; RUN: llc -verify-machineinstrs -mcpu=pwr9 < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck -check-prefixes=CHECK-PWR9,CHECK-ALL %s
; RUN: llc -verify-machineinstrs -mcpu=pwr8 < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck -check-prefixes=CHECK-PWR8,CHECK-ALL %s
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck -check-prefixes=CHECK-PWR8,CHECK-ALL %s
; RUN: llc -verify-machineinstrs -mcpu=pwr10 < %s -mtriple=powerpc-ibm-aix-xcoff | FileCheck -check-prefixes=CHECK-PWR10,CHECK-ALL %s
; RUN: llc -verify-machineinstrs -mcpu=pwr9 < %s -mtriple=powerpc-ibm-aix-xcoff | FileCheck -check-prefixes=CHECK-PWR9,CHECK-ALL %s
; RUN: llc -verify-machineinstrs -mcpu=pwr8 < %s -mtriple=powerpc-ibm-aix-xcoff | FileCheck -check-prefixes=CHECK-PWR8,CHECK-ALL %s
; RUN: llc -verify-machineinstrs -mcpu=pwr7 < %s -mtriple=powerpc-ibm-aix-xcoff | FileCheck -check-prefixes=CHECK-PWR7,CHECK-ALL %s
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-ibm-aix-xcoff | FileCheck -check-prefixes=CHECK-PWR7,CHECK-ALL %s
declare <2 x double> @__cbrtd2_P8(<2 x double>)
declare <4 x float> @__cbrtf4_P8(<4 x float>)
declare <2 x double> @__cbrtd2(<2 x double>)
declare <4 x float> @__cbrtf4(<4 x float>)
declare <2 x double> @__powd2_P8(<2 x double>, <2 x double>)
declare <4 x float> @__powf4_P8(<4 x float>, <4 x float>)
declare <2 x double> @__powd2(<2 x double>, <2 x double>)
declare <4 x float> @__powf4(<4 x float>, <4 x float>)
declare <2 x double> @__expd2_P8(<2 x double>)
declare <4 x float> @__expf4_P8(<4 x float>)
declare <2 x double> @__expd2(<2 x double>)
declare <4 x float> @__expf4(<4 x float>)
declare <2 x double> @__exp2d2_P8(<2 x double>)
declare <4 x float> @__exp2f4_P8(<4 x float>)
declare <2 x double> @__exp2d2(<2 x double>)
declare <4 x float> @__exp2f4(<4 x float>)
declare <2 x double> @__expm1d2_P8(<2 x double>)
declare <4 x float> @__expm1f4_P8(<4 x float>)
declare <2 x double> @__expm1d2(<2 x double>)
declare <4 x float> @__expm1f4(<4 x float>)
declare <2 x double> @__logd2_P8(<2 x double>)
declare <4 x float> @__logf4_P8(<4 x float>)
declare <2 x double> @__logd2(<2 x double>)
declare <4 x float> @__logf4(<4 x float>)
declare <2 x double> @__log1pd2_P8(<2 x double>)
declare <4 x float> @__log1pf4_P8(<4 x float>)
declare <2 x double> @__log1pd2(<2 x double>)
declare <4 x float> @__log1pf4(<4 x float>)
declare <2 x double> @__log10d2_P8(<2 x double>)
declare <4 x float> @__log10f4_P8(<4 x float>)
declare <2 x double> @__log10d2(<2 x double>)
declare <4 x float> @__log10f4(<4 x float>)
declare <2 x double> @__log2d2_P8(<2 x double>)
declare <4 x float> @__log2f4_P8(<4 x float>)
declare <2 x double> @__log2d2(<2 x double>)
declare <4 x float> @__log2f4(<4 x float>)
declare <2 x double> @__sind2_P8(<2 x double>)
declare <4 x float> @__sinf4_P8(<4 x float>)
declare <2 x double> @__sind2(<2 x double>)
declare <4 x float> @__sinf4(<4 x float>)
declare <2 x double> @__cosd2_P8(<2 x double>)
declare <4 x float> @__cosf4_P8(<4 x float>)
declare <2 x double> @__cosd2(<2 x double>)
declare <4 x float> @__cosf4(<4 x float>)
declare <2 x double> @__tand2_P8(<2 x double>)
declare <4 x float> @__tanf4_P8(<4 x float>)
declare <2 x double> @__tand2(<2 x double>)
declare <4 x float> @__tanf4(<4 x float>)
declare <2 x double> @__asind2_P8(<2 x double>)
declare <4 x float> @__asinf4_P8(<4 x float>)
declare <2 x double> @__asind2(<2 x double>)
declare <4 x float> @__asinf4(<4 x float>)
declare <2 x double> @__acosd2_P8(<2 x double>)
declare <4 x float> @__acosf4_P8(<4 x float>)
declare <2 x double> @__acosd2(<2 x double>)
declare <4 x float> @__acosf4(<4 x float>)
declare <2 x double> @__atand2_P8(<2 x double>)
declare <4 x float> @__atanf4_P8(<4 x float>)
declare <2 x double> @__atand2(<2 x double>)
declare <4 x float> @__atanf4(<4 x float>)
declare <2 x double> @__atan2d2_P8(<2 x double>)
declare <4 x float> @__atan2f4_P8(<4 x float>)
declare <2 x double> @__atan2d2(<2 x double>)
declare <4 x float> @__atan2f4(<4 x float>)
declare <2 x double> @__sinhd2_P8(<2 x double>)
declare <4 x float> @__sinhf4_P8(<4 x float>)
declare <2 x double> @__sinhd2(<2 x double>)
declare <4 x float> @__sinhf4(<4 x float>)
declare <2 x double> @__coshd2_P8(<2 x double>)
declare <4 x float> @__coshf4_P8(<4 x float>)
declare <2 x double> @__coshd2(<2 x double>)
declare <4 x float> @__coshf4(<4 x float>)
declare <2 x double> @__tanhd2_P8(<2 x double>)
declare <4 x float> @__tanhf4_P8(<4 x float>)
declare <2 x double> @__tanhd2(<2 x double>)
declare <4 x float> @__tanhf4(<4 x float>)
declare <2 x double> @__asinhd2_P8(<2 x double>)
declare <4 x float> @__asinhf4_P8(<4 x float>)
declare <2 x double> @__asinhd2(<2 x double>)
declare <4 x float> @__asinhf4(<4 x float>)
declare <2 x double> @__acoshd2_P8(<2 x double>)
declare <4 x float> @__acoshf4_P8(<4 x float>)
declare <2 x double> @__acoshd2(<2 x double>)
declare <4 x float> @__acoshf4(<4 x float>)
declare <2 x double> @__atanhd2_P8(<2 x double>)
declare <4 x float> @__atanhf4_P8(<4 x float>)
declare <2 x double> @__atanhd2(<2 x double>)
declare <4 x float> @__atanhf4(<4 x float>)
; following tests check generation of subtarget-specific calls
; cbrt
define <2 x double> @cbrt_f64_massv(<2 x double> %opnd) {
; CHECK-ALL-LABEL: @cbrt_f64_massv
; CHECK-PWR9: bl __cbrtd2_P9
; CHECK-PWR8: bl __cbrtd2_P8
; CHECK-NOT: bl __cbrtd2_massv
; CHECK-ALL: blr
; CHECK-PWR10: __cbrtd2_P10
; CHECK-PWR9: __cbrtd2_P9
; CHECK-PWR8: __cbrtd2_P8
; CHECK-PWR7: __cbrtd2_P7
; CHECK-NOT: __cbrtd2_massv
; CHECK-ALL: blr
;
%1 = call <2 x double> @__cbrtd2_P8(<2 x double> %opnd)
%1 = call <2 x double> @__cbrtd2(<2 x double> %opnd)
ret <2 x double> %1
}
define <4 x float> @cbrt_f32_massv(<4 x float> %opnd) {
; CHECK-ALL-LABEL: @cbrt_f32_massv
; CHECK-PWR9: bl __cbrtf4_P9
; CHECK-PWR8: bl __cbrtf4_P8
; CHECK-NOT: bl __cbrtf4_massv
; CHECK-ALL: blr
; CHECK-PWR10: __cbrtf4_P10
; CHECK-PWR9: __cbrtf4_P9
; CHECK-PWR8: __cbrtf4_P8
; CHECK-PWR7: __cbrtf4_P7
; CHECK-NOT: __cbrtf4_massv
; CHECK-ALL: blr
;
%1 = call <4 x float> @__cbrtf4_P8(<4 x float> %opnd)
%1 = call <4 x float> @__cbrtf4(<4 x float> %opnd)
ret <4 x float> %1
}
; pow
define <2 x double> @pow_f64_massv(<2 x double> %opnd1, <2 x double> %opnd2) {
; CHECK-ALL-LABEL: @pow_f64_massv
; CHECK-PWR9: bl __powd2_P9
; CHECK-PWR8: bl __powd2_P8
; CHECK-NOT: bl __powd2_massv
; CHECK-ALL: blr
; CHECK-PWR10: __powd2_P10
; CHECK-PWR9: __powd2_P9
; CHECK-PWR8: __powd2_P8
; CHECK-PWR7: __powd2_P7
; CHECK-NOT: __powd2_massv
; CHECK-ALL: blr
;
%1 = call <2 x double> @__powd2_P8(<2 x double> %opnd1, <2 x double> %opnd2)
%1 = call <2 x double> @__powd2(<2 x double> %opnd1, <2 x double> %opnd2)
ret <2 x double> %1
}
define <4 x float> @pow_f32_massv(<4 x float> %opnd1, <4 x float> %opnd2) {
; CHECK-ALL-LABEL: @pow_f32_massv
; CHECK-PWR9: bl __powf4_P9
; CHECK-PWR8: bl __powf4_P8
; CHECK-NOT: bl __powf4_massv
; CHECK-ALL: blr
; CHECK-PWR10: __powf4_P10
; CHECK-PWR9: __powf4_P9
; CHECK-PWR8: __powf4_P8
; CHECK-PWR7: __powf4_P7
; CHECK-NOT: __powf4_massv
; CHECK-ALL: blr
;
%1 = call <4 x float> @__powf4_P8(<4 x float> %opnd1, <4 x float> %opnd2)
%1 = call <4 x float> @__powf4(<4 x float> %opnd1, <4 x float> %opnd2)
ret <4 x float> %1
}
; exp
define <2 x double> @exp_f64_massv(<2 x double> %opnd) {
; CHECK-ALL-LABEL: @exp_f64_massv
; CHECK-PWR9: bl __expd2_P9
; CHECK-PWR8: bl __expd2_P8
; CHECK-NOT: bl __expd2_massv
; CHECK-ALL: blr
; CHECK-PWR10: __expd2_P10
; CHECK-PWR9: __expd2_P9
; CHECK-PWR8: __expd2_P8
; CHECK-PWR7: __expd2_P7
; CHECK-NOT: __expd2_massv
; CHECK-ALL: blr
;
%1 = call <2 x double> @__expd2_P8(<2 x double> %opnd)
%1 = call <2 x double> @__expd2(<2 x double> %opnd)
ret <2 x double> %1
}
define <4 x float> @exp_f32_massv(<4 x float> %opnd) {
; CHECK-ALL-LABEL: @exp_f32_massv
; CHECK-PWR9: bl __expf4_P9
; CHECK-PWR8: bl __expf4_P8
; CHECK-NOT: bl __expf4_massv
; CHECK-ALL: blr
; CHECK-PWR10: __expf4_P10
; CHECK-PWR9: __expf4_P9
; CHECK-PWR8: __expf4_P8
; CHECK-PWR7: __expf4_P7
; CHECK-NOT: __expf4_massv
; CHECK-ALL: blr
;
%1 = call <4 x float> @__expf4_P8(<4 x float> %opnd)
%1 = call <4 x float> @__expf4(<4 x float> %opnd)
ret <4 x float> %1
}
; exp2
define <2 x double> @exp2_f64_massv(<2 x double> %opnd) {
; CHECK-ALL-LABEL: @exp2_f64_massv
; CHECK-PWR9: bl __exp2d2_P9
; CHECK-PWR8: bl __exp2d2_P8
; CHECK-NOT: bl __exp2d2_massv
; CHECK-ALL: blr
; CHECK-PWR10: __exp2d2_P10
; CHECK-PWR9: __exp2d2_P9
; CHECK-PWR8: __exp2d2_P8
; CHECK-PWR7: __exp2d2_P7
; CHECK-NOT: __exp2d2_massv
; CHECK-ALL: blr
;
%1 = call <2 x double> @__exp2d2_P8(<2 x double> %opnd)
%1 = call <2 x double> @__exp2d2(<2 x double> %opnd)
ret <2 x double> %1
}
define <4 x float> @exp2_f32_massv(<4 x float> %opnd) {
; CHECK-ALL-LABEL: @exp2_f32_massv
; CHECK-PWR9: bl __exp2f4_P9
; CHECK-PWR8: bl __exp2f4_P8
; CHECK-NOT: bl __exp2f4_massv
; CHECK-ALL: blr
; CHECK-PWR10: __exp2f4_P10
; CHECK-PWR9: __exp2f4_P9
; CHECK-PWR8: __exp2f4_P8
; CHECK-PWR7: __exp2f4_P7
; CHECK-NOT: __exp2f4_massv
; CHECK-ALL: blr
;
%1 = call <4 x float> @__exp2f4_P8(<4 x float> %opnd)
%1 = call <4 x float> @__exp2f4(<4 x float> %opnd)
ret <4 x float> %1
}
; expm1
define <2 x double> @expm1_f64_massv(<2 x double> %opnd) {
; CHECK-ALL-LABEL: @expm1_f64_massv
; CHECK-PWR9: bl __expm1d2_P9
; CHECK-PWR8: bl __expm1d2_P8
; CHECK-NOT: bl __expm1d2_massv
; CHECK-ALL: blr
; CHECK-PWR10: __expm1d2_P10
; CHECK-PWR9: __expm1d2_P9
; CHECK-PWR8: __expm1d2_P8
; CHECK-PWR7: __expm1d2_P7
; CHECK-NOT: __expm1d2_massv
; CHECK-ALL: blr
;
%1 = call <2 x double> @__expm1d2_P8(<2 x double> %opnd)
%1 = call <2 x double> @__expm1d2(<2 x double> %opnd)
ret <2 x double> %1
}
define <4 x float> @expm1_f32_massv(<4 x float> %opnd) {
; CHECK-ALL-LABEL: @expm1_f32_massv
; CHECK-PWR9: bl __expm1f4_P9
; CHECK-PWR8: bl __expm1f4_P8
; CHECK-NOT: bl __expm1f4_massv
; CHECK-ALL: blr
; CHECK-PWR10: __expm1f4_P10
; CHECK-PWR9: __expm1f4_P9
; CHECK-PWR8: __expm1f4_P8
; CHECK-PWR7: __expm1f4_P7
; CHECK-NOT: __expm1f4_massv
; CHECK-ALL: blr
;
%1 = call <4 x float> @__expm1f4_P8(<4 x float> %opnd)
%1 = call <4 x float> @__expm1f4(<4 x float> %opnd)
ret <4 x float> %1
}
; log
define <2 x double> @log_f64_massv(<2 x double> %opnd) {
; CHECK-ALL-LABEL: @log_f64_massv
; CHECK-PWR9: bl __logd2_P9
; CHECK-PWR8: bl __logd2_P8
; CHECK-NOT: bl __logd2_massv
; CHECK-ALL: blr
; CHECK-PWR10: __logd2_P10
; CHECK-PWR9: __logd2_P9
; CHECK-PWR8: __logd2_P8
; CHECK-PWR7: __logd2_P7
; CHECK-NOT: __logd2_massv
; CHECK-ALL: blr
;
%1 = call <2 x double> @__logd2_P8(<2 x double> %opnd)
%1 = call <2 x double> @__logd2(<2 x double> %opnd)
ret <2 x double> %1
}
define <4 x float> @log_f32_massv(<4 x float> %opnd) {
; CHECK-ALL-LABEL: @log_f32_massv
; CHECK-PWR9: bl __logf4_P9
; CHECK-PWR8: bl __logf4_P8
; CHECK-NOT: bl __logf4_massv
; CHECK-ALL: blr
; CHECK-PWR10: __logf4_P10
; CHECK-PWR9: __logf4_P9
; CHECK-PWR8: __logf4_P8
; CHECK-PWR7: __logf4_P7
; CHECK-NOT: __logf4_massv
; CHECK-ALL: blr
;
%1 = call <4 x float> @__logf4_P8(<4 x float> %opnd)
%1 = call <4 x float> @__logf4(<4 x float> %opnd)
ret <4 x float> %1
}
; log1p
define <2 x double> @log1p_f64_massv(<2 x double> %opnd) {
; CHECK-ALL-LABEL: @log1p_f64_massv
; CHECK-PWR9: bl __log1pd2_P9
; CHECK-PWR8: bl __log1pd2_P8
; CHECK-NOT: bl __log1pd2_massv
; CHECK-ALL: blr
; CHECK-PWR10: __log1pd2_P10
; CHECK-PWR9: __log1pd2_P9
; CHECK-PWR8: __log1pd2_P8
; CHECK-PWR7: __log1pd2_P7
; CHECK-NOT: __log1pd2_massv
; CHECK-ALL: blr
;
%1 = call <2 x double> @__log1pd2_P8(<2 x double> %opnd)
%1 = call <2 x double> @__log1pd2(<2 x double> %opnd)
ret <2 x double> %1
}
define <4 x float> @log1p_f32_massv(<4 x float> %opnd) {
; CHECK-ALL-LABEL: @log1p_f32_massv
; CHECK-PWR9: bl __log1pf4_P9
; CHECK-PWR8: bl __log1pf4_P8
; CHECK-NOT: bl __log1pf4_massv
; CHECK-ALL: blr
; CHECK-PWR10: __log1pf4_P10
; CHECK-PWR9: __log1pf4_P9
; CHECK-PWR8: __log1pf4_P8
; CHECK-PWR7: __log1pf4_P7
; CHECK-NOT: __log1pf4_massv
; CHECK-ALL: blr
;
%1 = call <4 x float> @__log1pf4_P8(<4 x float> %opnd)
%1 = call <4 x float> @__log1pf4(<4 x float> %opnd)
ret <4 x float> %1
}
; log10
define <2 x double> @log10_f64_massv(<2 x double> %opnd) {
; CHECK-ALL-LABEL: @log10_f64_massv
; CHECK-PWR9: bl __log10d2_P9
; CHECK-PWR8: bl __log10d2_P8
; CHECK-NOT: bl __log10d2_massv
; CHECK-ALL: blr
; CHECK-PWR10: __log10d2_P10
; CHECK-PWR9: __log10d2_P9
; CHECK-PWR8: __log10d2_P8
; CHECK-PWR7: __log10d2_P7
; CHECK-NOT: __log10d2_massv
; CHECK-ALL: blr
;
%1 = call <2 x double> @__log10d2_P8(<2 x double> %opnd)
%1 = call <2 x double> @__log10d2(<2 x double> %opnd)
ret <2 x double> %1
}
define <4 x float> @log10_f32_massv(<4 x float> %opnd) {
; CHECK-ALL-LABEL: @log10_f32_massv
; CHECK-PWR9: bl __log10f4_P9
; CHECK-PWR8: bl __log10f4_P8
; CHECK-NOT: bl __log10f4_massv
; CHECK-ALL: blr
; CHECK-PWR10: __log10f4_P10
; CHECK-PWR9: __log10f4_P9
; CHECK-PWR8: __log10f4_P8
; CHECK-PWR7: __log10f4_P7
; CHECK-NOT: __log10f4_massv
; CHECK-ALL: blr
;
%1 = call <4 x float> @__log10f4_P8(<4 x float> %opnd)
%1 = call <4 x float> @__log10f4(<4 x float> %opnd)
ret <4 x float> %1
}
; log2
define <2 x double> @log2_f64_massv(<2 x double> %opnd) {
; CHECK-ALL-LABEL: @log2_f64_massv
; CHECK-PWR9: bl __log2d2_P9
; CHECK-PWR8: bl __log2d2_P8
; CHECK-NOT: bl __log2d2_massv
; CHECK-ALL: blr
; CHECK-PWR10: __log2d2_P10
; CHECK-PWR9: __log2d2_P9
; CHECK-PWR8: __log2d2_P8
; CHECK-PWR7: __log2d2_P7
; CHECK-NOT: __log2d2_massv
; CHECK-ALL: blr
;
%1 = call <2 x double> @__log2d2_P8(<2 x double> %opnd)
%1 = call <2 x double> @__log2d2(<2 x double> %opnd)
ret <2 x double> %1
}
define <4 x float> @log2_f32_massv(<4 x float> %opnd) {
; CHECK-ALL-LABEL: @log2_f32_massv
; CHECK-PWR9: bl __log2f4_P9
; CHECK-PWR8: bl __log2f4_P8
; CHECK-NOT: bl __log2f4_massv
; CHECK-ALL: blr
; CHECK-PWR10: __log2f4_P10
; CHECK-PWR9: __log2f4_P9
; CHECK-PWR8: __log2f4_P8
; CHECK-PWR7: __log2f4_P7
; CHECK-NOT: __log2f4_massv
; CHECK-ALL: blr
;
%1 = call <4 x float> @__log2f4_P8(<4 x float> %opnd)
%1 = call <4 x float> @__log2f4(<4 x float> %opnd)
ret <4 x float> %1
}
; sin
define <2 x double> @sin_f64_massv(<2 x double> %opnd) {
; CHECK-ALL-LABEL: @sin_f64_massv
; CHECK-PWR9: bl __sind2_P9
; CHECK-PWR8: bl __sind2_P8
; CHECK-NOT: bl __sind2_massv
; CHECK-ALL: blr
; CHECK-PWR10: __sind2_P10
; CHECK-PWR9: __sind2_P9
; CHECK-PWR8: __sind2_P8
; CHECK-PWR7: __sind2_P7
; CHECK-NOT: __sind2_massv
; CHECK-ALL: blr
;
%1 = call <2 x double> @__sind2_P8(<2 x double> %opnd)
%1 = call <2 x double> @__sind2(<2 x double> %opnd)
ret <2 x double> %1
}
define <4 x float> @sin_f32_massv(<4 x float> %opnd) {
; CHECK-ALL-LABEL: @sin_f32_massv
; CHECK-PWR9: bl __sinf4_P9
; CHECK-PWR8: bl __sinf4_P8
; CHECK-NOT: bl __sinf4_massv
; CHECK-ALL: blr
; CHECK-PWR10: __sinf4_P10
; CHECK-PWR9: __sinf4_P9
; CHECK-PWR8: __sinf4_P8
; CHECK-PWR7: __sinf4_P7
; CHECK-NOT: __sinf4_massv
; CHECK-ALL: blr
;
%1 = call <4 x float> @__sinf4_P8(<4 x float> %opnd)
%1 = call <4 x float> @__sinf4(<4 x float> %opnd)
ret <4 x float> %1
}
; cos
define <2 x double> @cos_f64_massv(<2 x double> %opnd) {
; CHECK-ALL-LABEL: @cos_f64_massv
; CHECK-PWR9: bl __cosd2_P9
; CHECK-PWR8: bl __cosd2_P8
; CHECK-NOT: bl __cosd2_massv
; CHECK-ALL: blr
; CHECK-PWR10: __cosd2_P10
; CHECK-PWR9: __cosd2_P9
; CHECK-PWR8: __cosd2_P8
; CHECK-PWR7: __cosd2_P7
; CHECK-NOT: __cosd2_massv
; CHECK-ALL: blr
;
%1 = call <2 x double> @__cosd2_P8(<2 x double> %opnd)
%1 = call <2 x double> @__cosd2(<2 x double> %opnd)
ret <2 x double> %1
}
define <4 x float> @cos_f32_massv(<4 x float> %opnd) {
; CHECK-ALL-LABEL: @cos_f32_massv
; CHECK-PWR9: bl __cosf4_P9
; CHECK-PWR8: bl __cosf4_P8
; CHECK-NOT: bl __cosf4_massv
; CHECK-ALL: blr
; CHECK-PWR10: __cosf4_P10
; CHECK-PWR9: __cosf4_P9
; CHECK-PWR8: __cosf4_P8
; CHECK-PWR7: __cosf4_P7
; CHECK-NOT: __cosf4_massv
; CHECK-ALL: blr
;
%1 = call <4 x float> @__cosf4_P8(<4 x float> %opnd)
%1 = call <4 x float> @__cosf4(<4 x float> %opnd)
ret <4 x float> %1
}
; tan
define <2 x double> @tan_f64_massv(<2 x double> %opnd) {
; CHECK-ALL-LABEL: @tan_f64_massv
; CHECK-PWR9: bl __tand2_P9
; CHECK-PWR8: bl __tand2_P8
; CHECK-NOT: bl __tand2_massv
; CHECK-ALL: blr
; CHECK-PWR10: __tand2_P10
; CHECK-PWR9: __tand2_P9
; CHECK-PWR8: __tand2_P8
; CHECK-PWR7: __tand2_P7
; CHECK-NOT: __tand2_massv
; CHECK-ALL: blr
;
%1 = call <2 x double> @__tand2_P8(<2 x double> %opnd)
%1 = call <2 x double> @__tand2(<2 x double> %opnd)
ret <2 x double> %1
}
define <4 x float> @tan_f32_massv(<4 x float> %opnd) {
; CHECK-ALL-LABEL: @tan_f32_massv
; CHECK-PWR9: bl __tanf4_P9
; CHECK-PWR8: bl __tanf4_P8
; CHECK-NOT: bl __tanf4_massv
; CHECK-ALL: blr
; CHECK-PWR10: __tanf4_P10
; CHECK-PWR9: __tanf4_P9
; CHECK-PWR8: __tanf4_P8
; CHECK-PWR7: __tanf4_P7
; CHECK-NOT: __tanf4_massv
; CHECK-ALL: blr
;
%1 = call <4 x float> @__tanf4_P8(<4 x float> %opnd)
%1 = call <4 x float> @__tanf4(<4 x float> %opnd)
ret <4 x float> %1
}
; asin
define <2 x double> @asin_f64_massv(<2 x double> %opnd) {
; CHECK-ALL-LABEL: @asin_f64_massv
; CHECK-PWR9: bl __asind2_P9
; CHECK-PWR8: bl __asind2_P8
; CHECK-NOT: bl __asind2_massv
; CHECK-ALL: blr
; CHECK-PWR10: __asind2_P10
; CHECK-PWR9: __asind2_P9
; CHECK-PWR8: __asind2_P8
; CHECK-PWR7: __asind2_P7
; CHECK-NOT: __asind2_massv
; CHECK-ALL: blr
;
%1 = call <2 x double> @__asind2_P8(<2 x double> %opnd)
%1 = call <2 x double> @__asind2(<2 x double> %opnd)
ret <2 x double> %1
}
define <4 x float> @asin_f32_massv(<4 x float> %opnd) {
; CHECK-ALL-LABEL: @asin_f32_massv
; CHECK-PWR9: bl __asinf4_P9
; CHECK-PWR8: bl __asinf4_P8
; CHECK-NOT: bl __asinf4_massv
; CHECK-ALL: blr
; CHECK-PWR10: __asinf4_P10
; CHECK-PWR9: __asinf4_P9
; CHECK-PWR8: __asinf4_P8
; CHECK-PWR7: __asinf4_P7
; CHECK-NOT: __asinf4_massv
; CHECK-ALL: blr
;
%1 = call <4 x float> @__asinf4_P8(<4 x float> %opnd)
%1 = call <4 x float> @__asinf4(<4 x float> %opnd)
ret <4 x float> %1
}
; acos
define <2 x double> @acos_f64_massv(<2 x double> %opnd) {
; CHECK-ALL-LABEL: @acos_f64_massv
; CHECK-PWR9: bl __acosd2_P9
; CHECK-PWR8: bl __acosd2_P8
; CHECK-NOT: bl __acosd2_massv
; CHECK-ALL: blr
; CHECK-PWR10: __acosd2_P10
; CHECK-PWR9: __acosd2_P9
; CHECK-PWR8: __acosd2_P8
; CHECK-PWR7: __acosd2_P7
; CHECK-NOT: __acosd2_massv
; CHECK-ALL: blr
;
%1 = call <2 x double> @__acosd2_P8(<2 x double> %opnd)
%1 = call <2 x double> @__acosd2(<2 x double> %opnd)
ret <2 x double> %1
}
define <4 x float> @acos_f32_massv(<4 x float> %opnd) {
; CHECK-ALL-LABEL: @acos_f32_massv
; CHECK-PWR9: bl __acosf4_P9
; CHECK-PWR8: bl __acosf4_P8
; CHECK-NOT: bl __acosf4_massv
; CHECK-ALL: blr
; CHECK-PWR10: __acosf4_P10
; CHECK-PWR9: __acosf4_P9
; CHECK-PWR8: __acosf4_P8
; CHECK-PWR7: __acosf4_P7
; CHECK-NOT: __acosf4_massv
; CHECK-ALL: blr
;
%1 = call <4 x float> @__acosf4_P8(<4 x float> %opnd)
%1 = call <4 x float> @__acosf4(<4 x float> %opnd)
ret <4 x float> %1
}
; atan
define <2 x double> @atan_f64_massv(<2 x double> %opnd) {
; CHECK-ALL-LABEL: @atan_f64_massv
; CHECK-PWR9: bl __atand2_P9
; CHECK-PWR8: bl __atand2_P8
; CHECK-NOT: bl __atand2_massv
; CHECK-ALL: blr
; CHECK-PWR10: __atand2_P10
; CHECK-PWR9: __atand2_P9
; CHECK-PWR8: __atand2_P8
; CHECK-PWR7: __atand2_P7
; CHECK-NOT: __atand2_massv
; CHECK-ALL: blr
;
%1 = call <2 x double> @__atand2_P8(<2 x double> %opnd)
%1 = call <2 x double> @__atand2(<2 x double> %opnd)
ret <2 x double> %1
}
define <4 x float> @atan_f32_massv(<4 x float> %opnd) {
; CHECK-ALL-LABEL: @atan_f32_massv
; CHECK-PWR9: bl __atanf4_P9
; CHECK-PWR8: bl __atanf4_P8
; CHECK-NOT: bl __atanf4_massv
; CHECK-ALL: blr
; CHECK-PWR10: __atanf4_P10
; CHECK-PWR9: __atanf4_P9
; CHECK-PWR8: __atanf4_P8
; CHECK-PWR7: __atanf4_P7
; CHECK-NOT: __atanf4_massv
; CHECK-ALL: blr
;
%1 = call <4 x float> @__atanf4_P8(<4 x float> %opnd)
%1 = call <4 x float> @__atanf4(<4 x float> %opnd)
ret <4 x float> %1
}
; atan2
define <2 x double> @atan2_f64_massv(<2 x double> %opnd) {
; CHECK-ALL-LABEL: @atan2_f64_massv
; CHECK-PWR9: bl __atan2d2_P9
; CHECK-PWR8: bl __atan2d2_P8
; CHECK-NOT: bl __atan2d2_massv
; CHECK-ALL: blr
; CHECK-PWR10: __atan2d2_P10
; CHECK-PWR9: __atan2d2_P9
; CHECK-PWR8: __atan2d2_P8
; CHECK-PWR7: __atan2d2_P7
; CHECK-NOT: __atan2d2_massv
; CHECK-ALL: blr
;
%1 = call <2 x double> @__atan2d2_P8(<2 x double> %opnd)
%1 = call <2 x double> @__atan2d2(<2 x double> %opnd)
ret <2 x double> %1
}
define <4 x float> @atan2_f32_massv(<4 x float> %opnd) {
; CHECK-ALL-LABEL: @atan2_f32_massv
; CHECK-PWR9: bl __atan2f4_P9
; CHECK-PWR8: bl __atan2f4_P8
; CHECK-NOT: bl __atan2f4_massv
; CHECK-ALL: blr
; CHECK-PWR10: __atan2f4_P10
; CHECK-PWR9: __atan2f4_P9
; CHECK-PWR8: __atan2f4_P8
; CHECK-PWR7: __atan2f4_P7
; CHECK-NOT: __atan2f4_massv
; CHECK-ALL: blr
;
%1 = call <4 x float> @__atan2f4_P8(<4 x float> %opnd)
%1 = call <4 x float> @__atan2f4(<4 x float> %opnd)
ret <4 x float> %1
}
; sinh
define <2 x double> @sinh_f64_massv(<2 x double> %opnd) {
; CHECK-ALL-LABEL: @sinh_f64_massv
; CHECK-PWR9: bl __sinhd2_P9
; CHECK-PWR8: bl __sinhd2_P8
; CHECK-NOT: bl __sinhd2_massv
; CHECK-ALL: blr
; CHECK-PWR10: __sinhd2_P10
; CHECK-PWR9: __sinhd2_P9
; CHECK-PWR8: __sinhd2_P8
; CHECK-PWR7: __sinhd2_P7
; CHECK-NOT: __sinhd2_massv
; CHECK-ALL: blr
;
%1 = call <2 x double> @__sinhd2_P8(<2 x double> %opnd)
%1 = call <2 x double> @__sinhd2(<2 x double> %opnd)
ret <2 x double> %1
}
define <4 x float> @sinh_f32_massv(<4 x float> %opnd) {
; CHECK-ALL-LABEL: @sinh_f32_massv
; CHECK-PWR9: bl __sinhf4_P9
; CHECK-PWR8: bl __sinhf4_P8
; CHECK-NOT: bl __sinhf4_massv
; CHECK-ALL: blr
; CHECK-PWR10: __sinhf4_P10
; CHECK-PWR9: __sinhf4_P9
; CHECK-PWR8: __sinhf4_P8
; CHECK-PWR7: __sinhf4_P7
; CHECK-NOT: __sinhf4_massv
; CHECK-ALL: blr
;
%1 = call <4 x float> @__sinhf4_P8(<4 x float> %opnd)
%1 = call <4 x float> @__sinhf4(<4 x float> %opnd)
ret <4 x float> %1
}
; cosh
define <2 x double> @cosh_f64_massv(<2 x double> %opnd) {
; CHECK-ALL-LABEL: @cosh_f64_massv
; CHECK-PWR9: bl __coshd2_P9
; CHECK-PWR8: bl __coshd2_P8
; CHECK-NOT: bl __coshd2_massv
; CHECK-ALL: blr
; CHECK-PWR10: __coshd2_P10
; CHECK-PWR9: __coshd2_P9
; CHECK-PWR8: __coshd2_P8
; CHECK-PWR7: __coshd2_P7
; CHECK-NOT: __coshd2_massv
; CHECK-ALL: blr
;
%1 = call <2 x double> @__coshd2_P8(<2 x double> %opnd)
%1 = call <2 x double> @__coshd2(<2 x double> %opnd)
ret <2 x double> %1
}
define <4 x float> @cosh_f32_massv(<4 x float> %opnd) {
; CHECK-ALL-LABEL: @cosh_f32_massv
; CHECK-PWR9: bl __coshf4_P9
; CHECK-PWR8: bl __coshf4_P8
; CHECK-NOT: bl __coshf4_massv
; CHECK-ALL: blr
; CHECK-PWR10: __coshf4_P10
; CHECK-PWR9: __coshf4_P9
; CHECK-PWR8: __coshf4_P8
; CHECK-PWR7: __coshf4_P7
; CHECK-NOT: __coshf4_massv
; CHECK-ALL: blr
;
%1 = call <4 x float> @__coshf4_P8(<4 x float> %opnd)
%1 = call <4 x float> @__coshf4(<4 x float> %opnd)
ret <4 x float> %1
}
; tanh
define <2 x double> @tanh_f64_massv(<2 x double> %opnd) {
; CHECK-ALL-LABEL: @tanh_f64_massv
; CHECK-PWR9: bl __tanhd2_P9
; CHECK-PWR8: bl __tanhd2_P8
; CHECK-NOT: bl __tanhd2_massv
; CHECK-ALL: blr
; CHECK-PWR10: __tanhd2_P10
; CHECK-PWR9: __tanhd2_P9
; CHECK-PWR8: __tanhd2_P8
; CHECK-PWR7: __tanhd2_P7
; CHECK-NOT: __tanhd2_massv
; CHECK-ALL: blr
;
%1 = call <2 x double> @__tanhd2_P8(<2 x double> %opnd)
%1 = call <2 x double> @__tanhd2(<2 x double> %opnd)
ret <2 x double> %1
}
define <4 x float> @tanh_f32_massv(<4 x float> %opnd) {
; CHECK-ALL-LABEL: @tanh_f32_massv
; CHECK-PWR9: bl __tanhf4_P9
; CHECK-PWR8: bl __tanhf4_P8
; CHECK-NOT: bl __tanhf4_massv
; CHECK-ALL: blr
; CHECK-PWR10: __tanhf4_P10
; CHECK-PWR9: __tanhf4_P9
; CHECK-PWR8: __tanhf4_P8
; CHECK-PWR7: __tanhf4_P7
; CHECK-NOT: __tanhf4_massv
; CHECK-ALL: blr
;
%1 = call <4 x float> @__tanhf4_P8(<4 x float> %opnd)
%1 = call <4 x float> @__tanhf4(<4 x float> %opnd)
ret <4 x float> %1
}
; asinh
define <2 x double> @asinh_f64_massv(<2 x double> %opnd) {
; CHECK-ALL-LABEL: @asinh_f64_massv
; CHECK-PWR9: bl __asinhd2_P9
; CHECK-PWR8: bl __asinhd2_P8
; CHECK-NOT: bl __asinhd2_massv
; CHECK-ALL: blr
; CHECK-PWR10: __asinhd2_P10
; CHECK-PWR9: __asinhd2_P9
; CHECK-PWR8: __asinhd2_P8
; CHECK-PWR7: __asinhd2_P7
; CHECK-NOT: __asinhd2_massv
; CHECK-ALL: blr
;
%1 = call <2 x double> @__asinhd2_P8(<2 x double> %opnd)
%1 = call <2 x double> @__asinhd2(<2 x double> %opnd)
ret <2 x double> %1
}
define <4 x float> @asinh_f32_massv(<4 x float> %opnd) {
; CHECK-ALL-LABEL: @asinh_f32_massv
; CHECK-PWR9: bl __asinhf4_P9
; CHECK-PWR8: bl __asinhf4_P8
; CHECK-NOT: bl __asinhf4_massv
; CHECK-ALL: blr
; CHECK-PWR10: __asinhf4_P10
; CHECK-PWR9: __asinhf4_P9
; CHECK-PWR8: __asinhf4_P8
; CHECK-PWR7: __asinhf4_P7
; CHECK-NOT: __asinhf4_massv
; CHECK-ALL: blr
;
%1 = call <4 x float> @__asinhf4_P8(<4 x float> %opnd)
%1 = call <4 x float> @__asinhf4(<4 x float> %opnd)
ret <4 x float> %1
}
; acosh
define <2 x double> @acosh_f64_massv(<2 x double> %opnd) {
; CHECK-ALL-LABEL: @acosh_f64_massv
; CHECK-PWR9: bl __acoshd2_P9
; CHECK-PWR8: bl __acoshd2_P8
; CHECK-NOT: bl __acoshd2_massv
; CHECK-ALL: blr
; CHECK-PWR10: __acoshd2_P10
; CHECK-PWR9: __acoshd2_P9
; CHECK-PWR8: __acoshd2_P8
; CHECK-PWR7: __acoshd2_P7
; CHECK-NOT: __acoshd2_massv
; CHECK-ALL: blr
;
%1 = call <2 x double> @__acoshd2_P8(<2 x double> %opnd)
%1 = call <2 x double> @__acoshd2(<2 x double> %opnd)
ret <2 x double> %1
}
define <4 x float> @acosh_f32_massv(<4 x float> %opnd) {
; CHECK-ALL-LABEL: @acosh_f32_massv
; CHECK-PWR9: bl __acoshf4_P9
; CHECK-PWR8: bl __acoshf4_P8
; CHECK-NOT: bl __acoshf4_massv
; CHECK-ALL: blr
; CHECK-PWR10: __acoshf4_P10
; CHECK-PWR9: __acoshf4_P9
; CHECK-PWR8: __acoshf4_P8
; CHECK-PWR7: __acoshf4_P7
; CHECK-NOT: __acoshf4_massv
; CHECK-ALL: blr
;
%1 = call <4 x float> @__acoshf4_P8(<4 x float> %opnd)
%1 = call <4 x float> @__acoshf4(<4 x float> %opnd)
ret <4 x float> %1
}
; atanh
define <2 x double> @atanh_f64_massv(<2 x double> %opnd) {
; CHECK-ALL-LABEL: @atanh_f64_massv
; CHECK-PWR9: bl __atanhd2_P9
; CHECK-PWR8: bl __atanhd2_P8
; CHECK-NOT: bl __atanhd2_massv
; CHECK-ALL: blr
; CHECK-PWR10: __atanhd2_P10
; CHECK-PWR9: __atanhd2_P9
; CHECK-PWR8: __atanhd2_P8
; CHECK-PWR7: __atanhd2_P7
; CHECK-NOT: __atanhd2_massv
; CHECK-ALL: blr
;
%1 = call <2 x double> @__atanhd2_P8(<2 x double> %opnd)
%1 = call <2 x double> @__atanhd2(<2 x double> %opnd)
ret <2 x double> %1
}
define <4 x float> @atanh_f32_massv(<4 x float> %opnd) {
; CHECK-ALL-LABEL: @atanh_f32_massv
; CHECK-PWR9: bl __atanhf4_P9
; CHECK-PWR8: bl __atanhf4_P8
; CHECK-NOT: bl __atanhf4_massv
; CHECK-ALL: blr
; CHECK-PWR10: __atanhf4_P10
; CHECK-PWR9: __atanhf4_P9
; CHECK-PWR8: __atanhf4_P8
; CHECK-PWR7: __atanhf4_P7
; CHECK-NOT: __atanhf4_massv
; CHECK-ALL: blr
;
%1 = call <4 x float> @__atanhf4_P8(<4 x float> %opnd)
%1 = call <4 x float> @__atanhf4(<4 x float> %opnd)
ret <4 x float> %1
}

View File

@ -1,11 +1,18 @@
; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr10 | FileCheck -check-prefixes=CHECK-PWR9 %s
; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 | FileCheck -check-prefixes=CHECK-PWR9 %s
; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 | FileCheck -check-prefixes=CHECK-PWR8 %s
; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr10 | FileCheck -check-prefixes=CHECK-PWR10 %s
; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr9 | FileCheck -check-prefixes=CHECK-PWR9 %s
; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr8 | FileCheck -check-prefixes=CHECK-PWR8 %s
; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr7 | FileCheck -check-prefixes=CHECK-PWR7 %s
; Exponent is a variable
define void @vpow_var(double* nocapture %z, double* nocapture readonly %y, double* nocapture readonly %x) {
; CHECK-LABEL: @vpow_var
; CHECK-PWR9: bl __powd2_P9
; CHECK-PWR8: bl __powd2_P8
; CHECK-PWR10: __powd2_P10
; CHECK-PWR9: __powd2_P9
; CHECK-PWR8: __powd2_P8
; CHECK-PWR7: __powd2_P7
; CHECK: blr
entry:
br label %vector.body
@ -19,7 +26,7 @@ vector.body:
%wide.load = load <2 x double>, <2 x double>* %0, align 8
%1 = bitcast double* %next.gep31 to <2 x double>*
%wide.load33 = load <2 x double>, <2 x double>* %1, align 8
%2 = call ninf afn nsz <2 x double> @__powd2_P8(<2 x double> %wide.load, <2 x double> %wide.load33)
%2 = call ninf afn nsz <2 x double> @__powd2(<2 x double> %wide.load, <2 x double> %wide.load33)
%3 = bitcast double* %next.gep to <2 x double>*
store <2 x double> %2, <2 x double>* %3, align 8
%index.next = add i64 %index, 2
@ -33,8 +40,10 @@ for.end:
; Exponent is a constant != 0.75 and !=0.25
define void @vpow_const(double* nocapture %y, double* nocapture readonly %x) {
; CHECK-LABEL: @vpow_const
; CHECK-PWR9: bl __powd2_P9
; CHECK-PWR8: bl __powd2_P8
; CHECK-PWR10: __powd2_P10
; CHECK-PWR9: __powd2_P9
; CHECK-PWR8: __powd2_P8
; CHECK-PWR7: __powd2_P7
; CHECK: blr
entry:
br label %vector.body
@ -45,7 +54,7 @@ vector.body:
%next.gep19 = getelementptr double, double* %x, i64 %index
%0 = bitcast double* %next.gep19 to <2 x double>*
%wide.load = load <2 x double>, <2 x double>* %0, align 8
%1 = call ninf afn nsz <2 x double> @__powd2_P8(<2 x double> %wide.load, <2 x double> <double 7.600000e-01, double 7.600000e-01>)
%1 = call ninf afn nsz <2 x double> @__powd2(<2 x double> %wide.load, <2 x double> <double 7.600000e-01, double 7.600000e-01>)
%2 = bitcast double* %next.gep to <2 x double>*
store <2 x double> %1, <2 x double>* %2, align 8
%index.next = add i64 %index, 2
@ -59,8 +68,10 @@ for.end:
; Exponent is a constant != 0.75 and !=0.25 and they are different
define void @vpow_noeq_const(double* nocapture %y, double* nocapture readonly %x) {
; CHECK-LABEL: @vpow_noeq_const
; CHECK-PWR9: bl __powd2_P9
; CHECK-PWR8: bl __powd2_P8
; CHECK-PWR10: __powd2_P10
; CHECK-PWR9: __powd2_P9
; CHECK-PWR8: __powd2_P8
; CHECK-PWR7: __powd2_P7
; CHECK: blr
entry:
br label %vector.body
@ -71,7 +82,7 @@ vector.body:
%next.gep19 = getelementptr double, double* %x, i64 %index
%0 = bitcast double* %next.gep19 to <2 x double>*
%wide.load = load <2 x double>, <2 x double>* %0, align 8
%1 = call ninf afn nsz <2 x double> @__powd2_P8(<2 x double> %wide.load, <2 x double> <double 7.700000e-01, double 7.600000e-01>)
%1 = call ninf afn nsz <2 x double> @__powd2(<2 x double> %wide.load, <2 x double> <double 7.700000e-01, double 7.600000e-01>)
%2 = bitcast double* %next.gep to <2 x double>*
store <2 x double> %1, <2 x double>* %2, align 8
%index.next = add i64 %index, 2
@ -85,8 +96,10 @@ for.end:
; Exponent is a constant != 0.75 and !=0.25 and they are different
define void @vpow_noeq075_const(double* nocapture %y, double* nocapture readonly %x) {
; CHECK-LABEL: @vpow_noeq075_const
; CHECK-PWR9: bl __powd2_P9
; CHECK-PWR8: bl __powd2_P8
; CHECK-PWR10: __powd2_P10
; CHECK-PWR9: __powd2_P9
; CHECK-PWR8: __powd2_P8
; CHECK-PWR7: __powd2_P7
; CHECK: blr
entry:
br label %vector.body
@ -97,7 +110,7 @@ vector.body:
%next.gep19 = getelementptr double, double* %x, i64 %index
%0 = bitcast double* %next.gep19 to <2 x double>*
%wide.load = load <2 x double>, <2 x double>* %0, align 8
%1 = call ninf afn nsz <2 x double> @__powd2_P8(<2 x double> %wide.load, <2 x double> <double 7.700000e-01, double 7.500000e-01>)
%1 = call ninf afn nsz <2 x double> @__powd2(<2 x double> %wide.load, <2 x double> <double 7.700000e-01, double 7.500000e-01>)
%2 = bitcast double* %next.gep to <2 x double>*
store <2 x double> %1, <2 x double>* %2, align 8
%index.next = add i64 %index, 2
@ -111,8 +124,10 @@ for.end:
; Exponent is a constant != 0.75 and !=0.25 and they are different
define void @vpow_noeq025_const(double* nocapture %y, double* nocapture readonly %x) {
; CHECK-LABEL: @vpow_noeq025_const
; CHECK-PWR9: bl __powd2_P9
; CHECK-PWR8: bl __powd2_P8
; CHECK-PWR10: __powd2_P10
; CHECK-PWR9: __powd2_P9
; CHECK-PWR8: __powd2_P8
; CHECK-PWR7: __powd2_P7
; CHECK: blr
entry:
br label %vector.body
@ -123,7 +138,7 @@ vector.body:
%next.gep19 = getelementptr double, double* %x, i64 %index
%0 = bitcast double* %next.gep19 to <2 x double>*
%wide.load = load <2 x double>, <2 x double>* %0, align 8
%1 = call ninf afn nsz <2 x double> @__powd2_P8(<2 x double> %wide.load, <2 x double> <double 7.700000e-01, double 2.500000e-01>)
%1 = call ninf afn nsz <2 x double> @__powd2(<2 x double> %wide.load, <2 x double> <double 7.700000e-01, double 2.500000e-01>)
%2 = bitcast double* %next.gep to <2 x double>*
store <2 x double> %1, <2 x double>* %2, align 8
%index.next = add i64 %index, 2
@ -137,7 +152,7 @@ for.end:
; Exponent is 0.75
define void @vpow_075(double* nocapture %y, double* nocapture readonly %x) {
; CHECK-LABEL: @vpow_075
; CHECK-NOT: bl __powd2_P{{[8,9]}}
; CHECK-NOT: __powd2_P{{[7,8,9,10]}}
; CHECK: xvrsqrtesp
; CHECK: blr
entry:
@ -149,7 +164,7 @@ vector.body:
%next.gep19 = getelementptr double, double* %x, i64 %index
%0 = bitcast double* %next.gep19 to <2 x double>*
%wide.load = load <2 x double>, <2 x double>* %0, align 8
%1 = call ninf afn <2 x double> @__powd2_P8(<2 x double> %wide.load, <2 x double> <double 7.500000e-01, double 7.500000e-01>)
%1 = call ninf afn <2 x double> @__powd2(<2 x double> %wide.load, <2 x double> <double 7.500000e-01, double 7.500000e-01>)
%2 = bitcast double* %next.gep to <2 x double>*
store <2 x double> %1, <2 x double>* %2, align 8
%index.next = add i64 %index, 2
@ -163,7 +178,7 @@ for.end:
; Exponent is 0.25
define void @vpow_025(double* nocapture %y, double* nocapture readonly %x) {
; CHECK-LABEL: @vpow_025
; CHECK-NOT: bl __powd2_P{{[8,9]}}
; CHECK-NOT: __powd2_P{{[7,8,9,10]}}
; CHECK: xvrsqrtesp
; CHECK: blr
entry:
@ -175,7 +190,7 @@ vector.body:
%next.gep19 = getelementptr double, double* %x, i64 %index
%0 = bitcast double* %next.gep19 to <2 x double>*
%wide.load = load <2 x double>, <2 x double>* %0, align 8
%1 = call ninf afn nsz <2 x double> @__powd2_P8(<2 x double> %wide.load, <2 x double> <double 2.500000e-01, double 2.500000e-01>)
%1 = call ninf afn nsz <2 x double> @__powd2(<2 x double> %wide.load, <2 x double> <double 2.500000e-01, double 2.500000e-01>)
%2 = bitcast double* %next.gep to <2 x double>*
store <2 x double> %1, <2 x double>* %2, align 8
%index.next = add i64 %index, 2
@ -189,8 +204,10 @@ for.end:
; Exponent is 0.75 but no proper fast-math flags
define void @vpow_075_nofast(double* nocapture %y, double* nocapture readonly %x) {
; CHECK-LABEL: @vpow_075_nofast
; CHECK-PWR9: bl __powd2_P9
; CHECK-PWR8: bl __powd2_P8
; CHECK-PWR10: __powd2_P10
; CHECK-PWR9: __powd2_P9
; CHECK-PWR8: __powd2_P8
; CHECK-PWR7: __powd2_P7
; CHECK-NOT: xvrsqrtesp
; CHECK: blr
entry:
@ -202,7 +219,7 @@ vector.body:
%next.gep19 = getelementptr double, double* %x, i64 %index
%0 = bitcast double* %next.gep19 to <2 x double>*
%wide.load = load <2 x double>, <2 x double>* %0, align 8
%1 = call <2 x double> @__powd2_P8(<2 x double> %wide.load, <2 x double> <double 7.500000e-01, double 7.500000e-01>)
%1 = call <2 x double> @__powd2(<2 x double> %wide.load, <2 x double> <double 7.500000e-01, double 7.500000e-01>)
%2 = bitcast double* %next.gep to <2 x double>*
store <2 x double> %1, <2 x double>* %2, align 8
%index.next = add i64 %index, 2
@ -216,8 +233,10 @@ for.end:
; Exponent is 0.25 but no proper fast-math flags
define void @vpow_025_nofast(double* nocapture %y, double* nocapture readonly %x) {
; CHECK-LABEL: @vpow_025_nofast
; CHECK-PWR9: bl __powd2_P9
; CHECK-PWR8: bl __powd2_P8
; CHECK-PWR10: __powd2_P10
; CHECK-PWR9: __powd2_P9
; CHECK-PWR8: __powd2_P8
; CHECK-PWR7: __powd2_P7
; CHECK-NOT: xvrsqrtesp
; CHECK: blr
entry:
@ -229,7 +248,7 @@ vector.body:
%next.gep19 = getelementptr double, double* %x, i64 %index
%0 = bitcast double* %next.gep19 to <2 x double>*
%wide.load = load <2 x double>, <2 x double>* %0, align 8
%1 = call <2 x double> @__powd2_P8(<2 x double> %wide.load, <2 x double> <double 2.500000e-01, double 2.500000e-01>)
%1 = call <2 x double> @__powd2(<2 x double> %wide.load, <2 x double> <double 2.500000e-01, double 2.500000e-01>)
%2 = bitcast double* %next.gep to <2 x double>*
store <2 x double> %1, <2 x double>* %2, align 8
%index.next = add i64 %index, 2
@ -241,4 +260,4 @@ for.end:
}
; Function Attrs: nounwind readnone speculatable willreturn
declare <2 x double> @__powd2_P8(<2 x double>, <2 x double>) #1
declare <2 x double> @__powd2(<2 x double>, <2 x double>) #1

View File

@ -1,11 +1,18 @@
; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr10 | FileCheck -check-prefixes=CHECK-PWR9 %s
; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 | FileCheck -check-prefixes=CHECK-PWR9 %s
; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 | FileCheck -check-prefixes=CHECK-PWR8 %s
; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr10 | FileCheck -check-prefixes=CHECK-PWR10 %s
; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr9 | FileCheck -check-prefixes=CHECK-PWR9 %s
; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr8 | FileCheck -check-prefixes=CHECK-PWR8 %s
; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr7 | FileCheck -check-prefixes=CHECK-PWR7 %s
; Exponent is a variable
define void @vspow_var(float* nocapture %z, float* nocapture readonly %y, float* nocapture readonly %x) {
; CHECK-LABEL: @vspow_var
; CHECK-PWR9: bl __powf4_P9
; CHECK-PWR8: bl __powf4_P8
; CHECK-PWR10: __powf4_P10
; CHECK-PWR9: __powf4_P9
; CHECK-PWR8: __powf4_P8
; CHECK-PWR7: __powf4_P7
; CHECK: blr
entry:
br label %vector.body
@ -19,7 +26,7 @@ vector.body:
%wide.load = load <4 x float>, <4 x float>* %0, align 4
%1 = bitcast float* %next.gep31 to <4 x float>*
%wide.load33 = load <4 x float>, <4 x float>* %1, align 4
%2 = call ninf afn nsz <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> %wide.load33)
%2 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> %wide.load33)
%3 = bitcast float* %next.gep to <4 x float>*
store <4 x float> %2, <4 x float>* %3, align 4
%index.next = add i64 %index, 4
@ -33,8 +40,10 @@ for.end:
; Exponent is a constant != 0.75 and !=0.25
define void @vspow_const(float* nocapture %y, float* nocapture readonly %x) {
; CHECK-LABEL: @vspow_const
; CHECK-PWR9: bl __powf4_P9
; CHECK-PWR8: bl __powf4_P8
; CHECK-PWR10: __powf4_P10
; CHECK-PWR9: __powf4_P9
; CHECK-PWR8: __powf4_P8
; CHECK-PWR7: __powf4_P7
; CHECK: blr
entry:
br label %vector.body
@ -45,7 +54,7 @@ vector.body:
%next.gep19 = getelementptr float, float* %x, i64 %index
%0 = bitcast float* %next.gep19 to <4 x float>*
%wide.load = load <4 x float>, <4 x float>* %0, align 4
%1 = call ninf afn nsz <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>)
%1 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>)
%2 = bitcast float* %next.gep to <4 x float>*
store <4 x float> %1, <4 x float>* %2, align 4
%index.next = add i64 %index, 4
@ -59,8 +68,10 @@ for.end:
; Exponent is a constant != 0.75 and !=0.25 and they are different
define void @vspow_neq_const(float* nocapture %y, float* nocapture readonly %x) {
; CHECK-LABEL: @vspow_neq_const
; CHECK-PWR9: bl __powf4_P9
; CHECK-PWR8: bl __powf4_P8
; CHECK-PWR10: __powf4_P10
; CHECK-PWR9: __powf4_P9
; CHECK-PWR8: __powf4_P8
; CHECK-PWR7: __powf4_P7
; CHECK: blr
entry:
br label %vector.body
@ -71,7 +82,7 @@ vector.body:
%next.gep19 = getelementptr float, float* %x, i64 %index
%0 = bitcast float* %next.gep19 to <4 x float>*
%wide.load = load <4 x float>, <4 x float>* %0, align 4
%1 = call ninf afn nsz <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 0x3FE861EB80000000, float 0x3FE871EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>)
%1 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 0x3FE861EB80000000, float 0x3FE871EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>)
%2 = bitcast float* %next.gep to <4 x float>*
store <4 x float> %1, <4 x float>* %2, align 4
%index.next = add i64 %index, 4
@ -85,8 +96,10 @@ for.end:
; Exponent is a constant != 0.75 and !=0.25
define void @vspow_neq075_const(float* nocapture %y, float* nocapture readonly %x) {
; CHECK-LABEL: @vspow_neq075_const
; CHECK-PWR9: bl __powf4_P9
; CHECK-PWR8: bl __powf4_P8
; CHECK-PWR10: __powf4_P10
; CHECK-PWR9: __powf4_P9
; CHECK-PWR8: __powf4_P8
; CHECK-PWR7: __powf4_P7
; CHECK: blr
entry:
br label %vector.body
@ -97,7 +110,7 @@ vector.body:
%next.gep19 = getelementptr float, float* %x, i64 %index
%0 = bitcast float* %next.gep19 to <4 x float>*
%wide.load = load <4 x float>, <4 x float>* %0, align 4
%1 = call ninf afn nsz <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 0x3FE851EB80000000>)
%1 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 0x3FE851EB80000000>)
%2 = bitcast float* %next.gep to <4 x float>*
store <4 x float> %1, <4 x float>* %2, align 4
%index.next = add i64 %index, 4
@ -111,8 +124,10 @@ for.end:
; Exponent is a constant != 0.75 and !=0.25
define void @vspow_neq025_const(float* nocapture %y, float* nocapture readonly %x) {
; CHECK-LABEL: @vspow_neq025_const
; CHECK-PWR9: bl __powf4_P9
; CHECK-PWR8: bl __powf4_P8
; CHECK-PWR10: __powf4_P10
; CHECK-PWR9: __powf4_P9
; CHECK-PWR8: __powf4_P8
; CHECK-PWR7: __powf4_P7
; CHECK: blr
entry:
br label %vector.body
@ -123,7 +138,7 @@ vector.body:
%next.gep19 = getelementptr float, float* %x, i64 %index
%0 = bitcast float* %next.gep19 to <4 x float>*
%wide.load = load <4 x float>, <4 x float>* %0, align 4
%1 = call ninf afn nsz <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 2.500000e-01, float 0x3FE851EB80000000, float 2.500000e-01>)
%1 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 2.500000e-01, float 0x3FE851EB80000000, float 2.500000e-01>)
%2 = bitcast float* %next.gep to <4 x float>*
store <4 x float> %1, <4 x float>* %2, align 4
%index.next = add i64 %index, 4
@ -137,7 +152,7 @@ for.end:
; Exponent is 0.75
define void @vspow_075(float* nocapture %y, float* nocapture readonly %x) {
; CHECK-LABEL: @vspow_075
; CHECK-NOT: bl __powf4_P{{[8,9]}}
; CHECK-NOT: __powf4_P{{[7,8,9,10]}}
; CHECK: xvrsqrtesp
; CHECK: blr
entry:
@ -149,7 +164,7 @@ vector.body:
%next.gep19 = getelementptr float, float* %x, i64 %index
%0 = bitcast float* %next.gep19 to <4 x float>*
%wide.load = load <4 x float>, <4 x float>* %0, align 4
%1 = call ninf afn <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 7.500000e-01>)
%1 = call ninf afn <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 7.500000e-01>)
%2 = bitcast float* %next.gep to <4 x float>*
store <4 x float> %1, <4 x float>* %2, align 4
%index.next = add i64 %index, 4
@ -163,7 +178,7 @@ for.end:
; Exponent is 0.25
define void @vspow_025(float* nocapture %y, float* nocapture readonly %x) {
; CHECK-LABEL: @vspow_025
; CHECK-NOT: bl __powf4_P{{[8,9]}}
; CHECK-NOT: __powf4_P{{[7,8,9,10]}}
; CHECK: xvrsqrtesp
; CHECK: blr
entry:
@ -175,7 +190,7 @@ vector.body:
%next.gep19 = getelementptr float, float* %x, i64 %index
%0 = bitcast float* %next.gep19 to <4 x float>*
%wide.load = load <4 x float>, <4 x float>* %0, align 4
%1 = call ninf afn nsz <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>)
%1 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>)
%2 = bitcast float* %next.gep to <4 x float>*
store <4 x float> %1, <4 x float>* %2, align 4
%index.next = add i64 %index, 4
@ -189,8 +204,10 @@ for.end:
; Exponent is 0.75 but no proper fast-math flags
define void @vspow_075_nofast(float* nocapture %y, float* nocapture readonly %x) {
; CHECK-LABEL: @vspow_075_nofast
; CHECK-PWR9: bl __powf4_P9
; CHECK-PWR8: bl __powf4_P8
; CHECK-PWR10: __powf4_P10
; CHECK-PWR9: __powf4_P9
; CHECK-PWR8: __powf4_P8
; CHECK-PWR7: __powf4_P7
; CHECK-NOT: xvrsqrtesp
; CHECK: blr
entry:
@ -202,7 +219,7 @@ vector.body:
%next.gep19 = getelementptr float, float* %x, i64 %index
%0 = bitcast float* %next.gep19 to <4 x float>*
%wide.load = load <4 x float>, <4 x float>* %0, align 4
%1 = call <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 7.500000e-01>)
%1 = call <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 7.500000e-01>)
%2 = bitcast float* %next.gep to <4 x float>*
store <4 x float> %1, <4 x float>* %2, align 4
%index.next = add i64 %index, 4
@ -216,8 +233,10 @@ for.end:
; Exponent is 0.25 but no proper fast-math flags
define void @vspow_025_nofast(float* nocapture %y, float* nocapture readonly %x) {
; CHECK-LABEL: @vspow_025_nofast
; CHECK-PWR9: bl __powf4_P9
; CHECK-PWR8: bl __powf4_P8
; CHECK-PWR10: __powf4_P10
; CHECK-PWR9: __powf4_P9
; CHECK-PWR8: __powf4_P8
; CHECK-PWR7: __powf4_P7
; CHECK-NOT: xvrsqrtesp
; CHECK: blr
entry:
@ -229,7 +248,7 @@ vector.body:
%next.gep19 = getelementptr float, float* %x, i64 %index
%0 = bitcast float* %next.gep19 to <4 x float>*
%wide.load = load <4 x float>, <4 x float>* %0, align 4
%1 = call <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>)
%1 = call <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>)
%2 = bitcast float* %next.gep to <4 x float>*
store <4 x float> %1, <4 x float>* %2, align 4
%index.next = add i64 %index, 4
@ -241,4 +260,4 @@ for.end:
}
; Function Attrs: nounwind readnone speculatable willreturn
declare <4 x float> @__powf4_P8(<4 x float>, <4 x float>)
declare <4 x float> @__powf4(<4 x float>, <4 x float>)

View File

@ -13,7 +13,7 @@ declare float @atanhf(float) #0
; Check that massv entries are not generated.
define void @cbrt_f64(double* nocapture %varray) {
; CHECK-LABEL: @cbrt_f64(
; CHECK-NOT: __cbrtd2_P8{{.*}}<2 x double>
; CHECK-NOT: __cbrtd2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -36,7 +36,7 @@ for.end:
define void @cbrt_f32(float* nocapture %varray) {
; CHECK-LABEL: @cbrt_f32(
; CHECK-NOT: __cbrtf4_P8{{.*}}<4 x float>
; CHECK-NOT: __cbrtf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -59,7 +59,7 @@ for.end:
define void @atanh_f64(double* nocapture %varray) {
; CHECK-LABEL: @atanh_f64(
; CHECK-NOT: __atanhd2_P8{{.*}}<2 x double>
; CHECK-NOT: __atanhd2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -82,7 +82,7 @@ for.end:
define void @atanh_f32(float* nocapture %varray) {
; CHECK-LABEL: @atanh_f32(
; CHECK-NOT: __atanhf4_P8{{.*}}<2 x double>
; CHECK-NOT: __atanhf4{{.*}}<2 x double>
; CHECK: ret void
;
entry:

View File

@ -1,7 +1,5 @@
; RUN: opt -vector-library=MASSV -inject-tli-mappings -loop-vectorize -force-vector-interleave=1 -S < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
; RUN: opt -vector-library=MASSV -mtriple=powerpc64le-unknown-linux-gnu -inject-tli-mappings -loop-vectorize -force-vector-interleave=1 -S < %s | FileCheck %s
; RUN: opt -vector-library=MASSV -vec-extabi -mattr=+altivec -mtriple=powerpc64-ibm-aix-xcoff -inject-tli-mappings -loop-vectorize -force-vector-interleave=1 -S < %s | FileCheck %s
declare double @cbrt(double) #0
declare float @cbrtf(float) #0
@ -90,7 +88,7 @@ declare float @atanhf(float) #0
define void @cbrt_f64(double* nocapture %varray) {
; CHECK-LABEL: @cbrt_f64(
; CHECK: __cbrtd2_P8{{.*}}<2 x double>
; CHECK: __cbrtd2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -113,7 +111,7 @@ for.end:
define void @cbrt_f32(float* nocapture %varray) {
; CHECK-LABEL: @cbrt_f32(
; CHECK: __cbrtf4_P8{{.*}}<4 x float>
; CHECK: __cbrtf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -136,7 +134,7 @@ for.end:
define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
; CHECK-LABEL: @pow_f64(
; CHECK: __powd2_P8{{.*}}<2 x double>
; CHECK: __powd2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -161,7 +159,7 @@ for.end:
define void @pow_f64_intrinsic(double* nocapture %varray, double* nocapture readonly %exp) {
; CHECK-LABEL: @pow_f64_intrinsic(
; CHECK: __powd2_P8{{.*}}<2 x double>
; CHECK: __powd2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -186,7 +184,7 @@ for.end:
define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
; CHECK-LABEL: @pow_f32(
; CHECK: __powf4_P8{{.*}}<4 x float>
; CHECK: __powf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -211,7 +209,7 @@ for.end:
define void @pow_f32_intrinsic(float* nocapture %varray, float* nocapture readonly %exp) {
; CHECK-LABEL: @pow_f32_intrinsic(
; CHECK: __powf4_P8{{.*}}<4 x float>
; CHECK: __powf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -236,7 +234,7 @@ for.end:
define void @sqrt_f64(double* nocapture %varray) {
; CHECK-LABEL: @sqrt_f64(
; CHECK-NOT: __sqrtd2_P8{{.*}}<2 x double>
; CHECK-NOT: __sqrtd2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -259,7 +257,7 @@ for.end:
define void @sqrt_f32(float* nocapture %varray) {
; CHECK-LABEL: @sqrt_f32(
; CHECK-NOT: __sqrtf4_P8{{.*}}<4 x float>
; CHECK-NOT: __sqrtf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -282,7 +280,7 @@ for.end:
define void @exp_f64(double* nocapture %varray) {
; CHECK-LABEL: @exp_f64(
; CHECK: __expd2_P8{{.*}}<2 x double>
; CHECK: __expd2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -305,7 +303,7 @@ for.end:
define void @exp_f64_intrinsic(double* nocapture %varray) {
; CHECK-LABEL: @exp_f64_intrinsic(
; CHECK: __expd2_P8{{.*}}<2 x double>
; CHECK: __expd2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -328,7 +326,7 @@ for.end:
define void @exp_f32(float* nocapture %varray) {
; CHECK-LABEL: @exp_f32(
; CHECK: __expf4_P8{{.*}}<4 x float>
; CHECK: __expf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -351,7 +349,7 @@ for.end:
define void @exp_f32_intrinsic(float* nocapture %varray) {
; CHECK-LABEL: @exp_f32_intrinsic(
; CHECK: __expf4_P8{{.*}}<4 x float>
; CHECK: __expf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -374,7 +372,7 @@ for.end:
define void @exp2_f64(double* nocapture %varray) {
; CHECK-LABEL: @exp2_f64(
; CHECK: __exp2d2_P8{{.*}}<2 x double>
; CHECK: __exp2d2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -397,7 +395,7 @@ for.end:
define void @exp2_f64_intrinsic(double* nocapture %varray) {
; CHECK-LABEL: @exp2_f64_intrinsic(
; CHECK: __exp2d2_P8{{.*}}<2 x double>
; CHECK: __exp2d2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -420,7 +418,7 @@ for.end:
define void @exp2_f32(float* nocapture %varray) {
; CHECK-LABEL: @exp2_f32(
; CHECK: __exp2f4_P8{{.*}}<4 x float>
; CHECK: __exp2f4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -443,7 +441,7 @@ for.end:
define void @exp2_f32_intrinsic(float* nocapture %varray) {
; CHECK-LABEL: @exp2_f32_intrinsic(
; CHECK: __exp2f4_P8{{.*}}<4 x float>
; CHECK: __exp2f4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -466,7 +464,7 @@ for.end:
define void @expm1_f64(double* nocapture %varray) {
; CHECK-LABEL: @expm1_f64(
; CHECK: __expm1d2_P8{{.*}}<2 x double>
; CHECK: __expm1d2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -489,7 +487,7 @@ for.end:
define void @expm1_f32(float* nocapture %varray) {
; CHECK-LABEL: @expm1_f32(
; CHECK: __expm1f4_P8{{.*}}<4 x float>
; CHECK: __expm1f4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -512,7 +510,7 @@ for.end:
define void @log_f64(double* nocapture %varray) {
; CHECK-LABEL: @log_f64(
; CHECK: __logd2_P8{{.*}}<2 x double>
; CHECK: __logd2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -535,7 +533,7 @@ for.end:
define void @log_f64_intrinsic(double* nocapture %varray) {
; CHECK-LABEL: @log_f64_intrinsic(
; CHECK: __logd2_P8{{.*}}<2 x double>
; CHECK: __logd2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -558,7 +556,7 @@ for.end:
define void @log_f32(float* nocapture %varray) {
; CHECK-LABEL: @log_f32(
; CHECK: __logf4_P8{{.*}}<4 x float>
; CHECK: __logf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -581,7 +579,7 @@ for.end:
define void @log_f32_intrinsic(float* nocapture %varray) {
; CHECK-LABEL: @log_f32_intrinsic(
; CHECK: __logf4_P8{{.*}}<4 x float>
; CHECK: __logf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -604,7 +602,7 @@ for.end:
define void @log1p_f64(double* nocapture %varray) {
; CHECK-LABEL: @log1p_f64(
; CHECK: __log1pd2_P8{{.*}}<2 x double>
; CHECK: __log1pd2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -627,7 +625,7 @@ for.end:
define void @log1p_f32(float* nocapture %varray) {
; CHECK-LABEL: @log1p_f32(
; CHECK: __log1pf4_P8{{.*}}<4 x float>
; CHECK: __log1pf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -650,7 +648,7 @@ for.end:
define void @log10_f64(double* nocapture %varray) {
; CHECK-LABEL: @log10_f64(
; CHECK: __log10d2_P8(<2 x double>
; CHECK: __log10d2(<2 x double>
; CHECK: ret void
;
entry:
@ -673,7 +671,7 @@ for.end:
define void @log10_f64_intrinsic(double* nocapture %varray) {
; CHECK-LABEL: @log10_f64_intrinsic(
; CHECK: __log10d2_P8{{.*}}<2 x double>
; CHECK: __log10d2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -696,7 +694,7 @@ for.end:
define void @log10_f32(float* nocapture %varray) {
; CHECK-LABEL: @log10_f32(
; CHECK: __log10f4_P8{{.*}}<4 x float>
; CHECK: __log10f4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -719,7 +717,7 @@ for.end:
define void @log10_f32_intrinsic(float* nocapture %varray) {
; CHECK-LABEL: @log10_f32_intrinsic(
; CHECK: __log10f4_P8{{.*}}<4 x float>
; CHECK: __log10f4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -742,7 +740,7 @@ for.end:
define void @log2_f64(double* nocapture %varray) {
; CHECK-LABEL: @log2_f64(
; CHECK: __log2d2_P8(<2 x double>
; CHECK: __log2d2(<2 x double>
; CHECK: ret void
;
entry:
@ -765,7 +763,7 @@ for.end:
define void @log2_f64_intrinsic(double* nocapture %varray) {
; CHECK-LABEL: @log2_f64_intrinsic(
; CHECK: __log2d2_P8{{.*}}<2 x double>
; CHECK: __log2d2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -788,7 +786,7 @@ for.end:
define void @log2_f32(float* nocapture %varray) {
; CHECK-LABEL: @log2_f32(
; CHECK: __log2f4_P8{{.*}}<4 x float>
; CHECK: __log2f4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -811,7 +809,7 @@ for.end:
define void @log2_f32_intrinsic(float* nocapture %varray) {
; CHECK-LABEL: @log2_f32_intrinsic(
; CHECK: __log2f4_P8{{.*}}<4 x float>
; CHECK: __log2f4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -834,7 +832,7 @@ for.end:
define void @sin_f64(double* nocapture %varray) {
; CHECK-LABEL: @sin_f64(
; CHECK: __sind2_P8{{.*}}<2 x double>
; CHECK: __sind2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -857,7 +855,7 @@ for.end:
define void @sin_f64_intrinsic(double* nocapture %varray) {
; CHECK-LABEL: @sin_f64_intrinsic(
; CHECK: __sind2_P8{{.*}}<2 x double>
; CHECK: __sind2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -880,7 +878,7 @@ for.end:
define void @sin_f32(float* nocapture %varray) {
; CHECK-LABEL: @sin_f32(
; CHECK: __sinf4_P8{{.*}}<4 x float>
; CHECK: __sinf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -903,7 +901,7 @@ for.end:
define void @sin_f32_intrinsic(float* nocapture %varray) {
; CHECK-LABEL: @sin_f32_intrinsic(
; CHECK: __sinf4_P8{{.*}}<4 x float>
; CHECK: __sinf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -926,7 +924,7 @@ for.end:
define void @cos_f64(double* nocapture %varray) {
; CHECK-LABEL: @cos_f64(
; CHECK: __cosd2_P8{{.*}}<2 x double>
; CHECK: __cosd2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -949,7 +947,7 @@ for.end:
define void @cos_f64_intrinsic(double* nocapture %varray) {
; CHECK-LABEL: @cos_f64_intrinsic(
; CHECK: [[TMP5:%.*]] = call <2 x double> @__cosd2_P8(<2 x double> [[TMP4:%.*]])
; CHECK: [[TMP5:%.*]] = call <2 x double> @__cosd2(<2 x double> [[TMP4:%.*]])
; CHECK: ret void
;
entry:
@ -972,7 +970,7 @@ for.end:
define void @cos_f32(float* nocapture %varray) {
; CHECK-LABEL: @cos_f32(
; CHECK: __cosf4_P8{{.*}}<4 x float>
; CHECK: __cosf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -995,7 +993,7 @@ for.end:
define void @cos_f32_intrinsic(float* nocapture %varray) {
; CHECK-LABEL: @cos_f32_intrinsic(
; CHECK: __cosf4_P8{{.*}}<4 x float>
; CHECK: __cosf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -1018,7 +1016,7 @@ for.end:
define void @tan_f64(double* nocapture %varray) {
; CHECK-LABEL: @tan_f64(
; CHECK: __tand2_P8{{.*}}<2 x double>
; CHECK: __tand2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -1041,7 +1039,7 @@ for.end:
define void @tan_f32(float* nocapture %varray) {
; CHECK-LABEL: @tan_f32(
; CHECK: __tanf4_P8{{.*}}<4 x float>
; CHECK: __tanf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -1064,7 +1062,7 @@ for.end:
define void @asin_f64(double* nocapture %varray) {
; CHECK-LABEL: @asin_f64(
; CHECK: __asind2_P8{{.*}}<2 x double>
; CHECK: __asind2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -1087,7 +1085,7 @@ for.end:
define void @asin_f32(float* nocapture %varray) {
; CHECK-LABEL: @asin_f32(
; CHECK: __asinf4_P8{{.*}}<4 x float>
; CHECK: __asinf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -1110,7 +1108,7 @@ for.end:
define void @acos_f64(double* nocapture %varray) {
; CHECK-LABEL: @acos_f64(
; CHECK: __acosd2_P8{{.*}}<2 x double>
; CHECK: __acosd2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -1133,7 +1131,7 @@ for.end:
define void @acos_f32(float* nocapture %varray) {
; CHECK-LABEL: @acos_f32(
; CHECK: __acosf4_P8{{.*}}<4 x float>
; CHECK: __acosf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -1156,7 +1154,7 @@ for.end:
define void @atan_f64(double* nocapture %varray) {
; CHECK-LABEL: @atan_f64(
; CHECK: __atand2_P8{{.*}}<2 x double>
; CHECK: __atand2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -1179,7 +1177,7 @@ for.end:
define void @atan_f32(float* nocapture %varray) {
; CHECK-LABEL: @atan_f32(
; CHECK: __atanf4_P8{{.*}}<4 x float>
; CHECK: __atanf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -1202,7 +1200,7 @@ for.end:
define void @atan2_f64(double* nocapture %varray) {
; CHECK-LABEL: @atan2_f64(
; CHECK: __atan2d2_P8{{.*}}<2 x double>
; CHECK: __atan2d2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -1225,7 +1223,7 @@ for.end:
define void @atan2_f32(float* nocapture %varray) {
; CHECK-LABEL: @atan2_f32(
; CHECK: __atan2f4_P8{{.*}}<4 x float>
; CHECK: __atan2f4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -1248,7 +1246,7 @@ for.end:
define void @sinh_f64(double* nocapture %varray) {
; CHECK-LABEL: @sinh_f64(
; CHECK: __sinhd2_P8{{.*}}<2 x double>
; CHECK: __sinhd2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -1271,7 +1269,7 @@ for.end:
define void @sinh_f32(float* nocapture %varray) {
; CHECK-LABEL: @sinh_f32(
; CHECK: __sinhf4_P8{{.*}}<4 x float>
; CHECK: __sinhf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -1294,7 +1292,7 @@ for.end:
define void @cosh_f64(double* nocapture %varray) {
; CHECK-LABEL: @cosh_f64(
; CHECK: __coshd2_P8{{.*}}<2 x double>
; CHECK: __coshd2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -1317,7 +1315,7 @@ for.end:
define void @cosh_f32(float* nocapture %varray) {
; CHECK-LABEL: @cosh_f32(
; CHECK: __coshf4_P8{{.*}}<4 x float>
; CHECK: __coshf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -1340,7 +1338,7 @@ for.end:
define void @tanh_f64(double* nocapture %varray) {
; CHECK-LABEL: @tanh_f64(
; CHECK: __tanhd2_P8{{.*}}<2 x double>
; CHECK: __tanhd2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -1363,7 +1361,7 @@ for.end:
define void @tanh_f32(float* nocapture %varray) {
; CHECK-LABEL: @tanh_f32(
; CHECK: __tanhf4_P8{{.*}}<4 x float>
; CHECK: __tanhf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -1386,7 +1384,7 @@ for.end:
define void @asinh_f64(double* nocapture %varray) {
; CHECK-LABEL: @asinh_f64(
; CHECK: __asinhd2_P8{{.*}}<2 x double>
; CHECK: __asinhd2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -1409,7 +1407,7 @@ for.end:
define void @asinh_f32(float* nocapture %varray) {
; CHECK-LABEL: @asinh_f32(
; CHECK: __asinhf4_P8{{.*}}<4 x float>
; CHECK: __asinhf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -1432,7 +1430,7 @@ for.end:
define void @acosh_f64(double* nocapture %varray) {
; CHECK-LABEL: @acosh_f64(
; CHECK: __acoshd2_P8{{.*}}<2 x double>
; CHECK: __acoshd2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -1455,7 +1453,7 @@ for.end:
define void @acosh_f32(float* nocapture %varray) {
; CHECK-LABEL: @acosh_f32(
; CHECK: __acoshf4_P8{{.*}}<4 x float>
; CHECK: __acoshf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
@ -1478,7 +1476,7 @@ for.end:
define void @atanh_f64(double* nocapture %varray) {
; CHECK-LABEL: @atanh_f64(
; CHECK: __atanhd2_P8{{.*}}<2 x double>
; CHECK: __atanhd2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -1501,7 +1499,7 @@ for.end:
define void @atanh_f32(float* nocapture %varray) {
; CHECK-LABEL: @atanh_f32(
; CHECK: __atanhf4_P8{{.*}}<4 x float>
; CHECK: __atanhf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:

View File

@ -9,7 +9,7 @@ declare float @atanhf(float) #1
; Check that functions marked as nobuiltin are not lowered to massv entries.
define void @atanh_f64(double* nocapture %varray) {
; CHECK-LABEL: @atanh_f64(
; CHECK-NOT: __atanhd2_P8{{.*}}<2 x double>
; CHECK-NOT: __atanhd2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -32,7 +32,7 @@ for.end:
define void @atanh_f32(float* nocapture %varray) {
; CHECK-LABEL: @atanh_f32(
; CHECK-NOT: __atanhf4_P8{{.*}}<2 x double>
; CHECK-NOT: __atanhf4{{.*}}<2 x double>
; CHECK: ret void
;
entry:

View File

@ -14,6 +14,7 @@ define void @ceil_f64(double* nocapture %varray) {
; CHECK-LABEL: @ceil_f64(
; CHECK-NOT: __ceild2_massv{{.*}}<2 x double>
; CHECK-NOT: __ceild2_P8{{.*}}<2 x double>
; CHECK-NOT: __ceild2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
@ -39,6 +40,7 @@ define void @fabs_f32(float* nocapture %varray) {
; CHECK-LABEL: @fabs_f32(
; CHECK-NOT: __fabsf4_massv{{.*}}<4 x float>
; CHECK-NOT: __fabsf4_P8{{.*}}<4 x float>
; CHECK-NOT: __fabsf4{{.*}}<4 x float>
; CHECK: ret void
;
entry:

View File

@ -15,7 +15,7 @@ define dso_local double @test(float* %Arr) {
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[TMP1]] to <2 x float>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = fpext <2 x float> [[WIDE_LOAD]] to <2 x double>
; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x double> @__sind2_P8(<2 x double> [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x double> @__sind2(<2 x double> [[TMP3]])
; CHECK-NEXT: [[TMP5]] = fadd fast <2 x double> [[TMP4]], [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128

View File

@ -19,8 +19,8 @@ target triple = "x86_64-unknown-linux-gnu"
; SVML-SAME: i8* bitcast (<8 x float> (<8 x float>)* @__svml_log10f8 to i8*),
; SVML-SAME: i8* bitcast (<16 x float> (<16 x float>)* @__svml_log10f16 to i8*)
; MASSV-SAME: [2 x i8*] [
; MASSV-SAME: i8* bitcast (<2 x double> (<2 x double>)* @__sind2_P8 to i8*),
; MASSV-SAME: i8* bitcast (<4 x float> (<4 x float>)* @__log10f4_P8 to i8*)
; MASSV-SAME: i8* bitcast (<2 x double> (<2 x double>)* @__sind2 to i8*),
; MASSV-SAME: i8* bitcast (<4 x float> (<4 x float>)* @__log10f4 to i8*)
; ACCELERATE-SAME: [1 x i8*] [
; ACCELERATE-SAME: i8* bitcast (<4 x float> (<4 x float>)* @vlog10f to i8*)
; LIBMVEC-X86-SAME: [2 x i8*] [
@ -64,9 +64,9 @@ attributes #0 = { nounwind readnone }
; SVML-SAME: _ZGV_LLVM_N8v_sin(__svml_sin8)" }
; MASSV: attributes #[[SIN]] = { "vector-function-abi-variant"=
; MASSV-SAME: "_ZGV_LLVM_N2v_sin(__sind2_P8)" }
; MASSV-SAME: "_ZGV_LLVM_N2v_sin(__sind2)" }
; MASSV: attributes #[[LOG10]] = { "vector-function-abi-variant"=
; MASSV-SAME: "_ZGV_LLVM_N4v_llvm.log10.f32(__log10f4_P8)" }
; MASSV-SAME: "_ZGV_LLVM_N4v_llvm.log10.f32(__log10f4)" }
; ACCELERATE: attributes #[[LOG10]] = { "vector-function-abi-variant"=
; ACCELERATE-SAME: "_ZGV_LLVM_N4v_llvm.log10.f32(vlog10f)" }