forked from OSchip/llvm-project
Implement aarch64 neon instruction set AdvSIMD (Across).
llvm-svn: 192029
This commit is contained in:
parent
ad242fbb71
commit
b96ebac02b
|
@ -165,6 +165,7 @@ class NoTestOpInst<string n, string p, string t, Op o> : Inst<n, p, t, o> {}
|
|||
// i: constant int
|
||||
// l: constant uint64
|
||||
// s: scalar of element type
|
||||
// r: scalar of double width element type
|
||||
// a: scalar of element type (splat to vector type)
|
||||
// k: default elt width, double num elts
|
||||
// #: array of default vectors
|
||||
|
@ -696,6 +697,15 @@ def VQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ddji", "siQsQi", OP_QRDMULH_LN>;
|
|||
def VMULX_LANE : IOpInst<"vmulx_lane", "ddgi", "fdQfQd", OP_MULX_LN>;
|
||||
def VMULX_LANEQ : IOpInst<"vmulx_laneq", "ddji", "fdQfQd", OP_MULX_LN>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Across vectors class
|
||||
def VADDLV : SInst<"vaddlv", "rd", "csiUcUsUiQcQsQiQUcQUsQUi">;
|
||||
def VMAXV : SInst<"vmaxv", "sd", "csiUcUsUiQcQsQiQUcQUsQUiQf">;
|
||||
def VMINV : SInst<"vminv", "sd", "csiUcUsUiQcQsQiQUcQUsQUiQf">;
|
||||
def VADDV : SInst<"vaddv", "sd", "csiUcUsUiQcQsQiQUcQUsQUi">;
|
||||
def FMAXNMV : SInst<"vmaxnmv", "sd", "Qf">;
|
||||
def FMINNMV : SInst<"vminnmv", "sd", "Qf">;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Scalar Arithmetic
|
||||
|
||||
|
|
|
@ -1748,209 +1748,245 @@ CodeGenFunction::EmitPointerWithAlignment(const Expr *Addr) {
|
|||
static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
|
||||
unsigned BuiltinID,
|
||||
const CallExpr *E) {
|
||||
NeonTypeFlags::EltType ET;
|
||||
bool usgn;
|
||||
unsigned int Int = 0;
|
||||
bool OverloadInt = true;
|
||||
// Scalar result generated across vectors
|
||||
bool AcrossVec = false;
|
||||
// Extend element of one-element vector
|
||||
bool ExtendEle = false;
|
||||
bool OverloadInt = false;
|
||||
const char *s = NULL;
|
||||
|
||||
SmallVector<Value *, 4> Ops;
|
||||
for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
|
||||
Ops.push_back(CGF.EmitScalarExpr(E->getArg(i)));
|
||||
}
|
||||
|
||||
// AArch64 scalar builtins are not overloaded, they do not have an extra
|
||||
// argument that specifies the vector type, need to handle each case.
|
||||
switch (BuiltinID) {
|
||||
default: break;
|
||||
// Scalar Add
|
||||
case AArch64::BI__builtin_neon_vaddd_s64:
|
||||
ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vaddds;
|
||||
s = "vaddds"; usgn = false; OverloadInt = false; break;
|
||||
Int = Intrinsic::aarch64_neon_vaddds;
|
||||
s = "vaddds"; break;
|
||||
case AArch64::BI__builtin_neon_vaddd_u64:
|
||||
ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vadddu;
|
||||
s = "vadddu"; usgn = true; OverloadInt = false; break;
|
||||
Int = Intrinsic::aarch64_neon_vadddu;
|
||||
s = "vadddu"; break;
|
||||
// Scalar Sub
|
||||
case AArch64::BI__builtin_neon_vsubd_s64:
|
||||
ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vsubds;
|
||||
s = "vsubds"; usgn = false; OverloadInt = false; break;
|
||||
Int = Intrinsic::aarch64_neon_vsubds;
|
||||
s = "vsubds"; break;
|
||||
case AArch64::BI__builtin_neon_vsubd_u64:
|
||||
ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vsubdu;
|
||||
s = "vsubdu"; usgn = true; OverloadInt = false; break;
|
||||
Int = Intrinsic::aarch64_neon_vsubdu;
|
||||
s = "vsubdu"; break;
|
||||
// Scalar Saturating Add
|
||||
case AArch64::BI__builtin_neon_vqaddb_s8:
|
||||
ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqadds;
|
||||
s = "vqadds"; usgn = false; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqaddh_s16:
|
||||
ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqadds;
|
||||
s = "vqadds"; usgn = false; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqadds_s32:
|
||||
ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqadds;
|
||||
s = "vqadds"; usgn = false; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqaddd_s64:
|
||||
ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqadds;
|
||||
s = "vqadds"; usgn = false; OverloadInt = true; break;
|
||||
Int = Intrinsic::aarch64_neon_vqadds;
|
||||
s = "vqadds"; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqaddb_u8:
|
||||
ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqaddu;
|
||||
s = "vqaddu"; usgn = true; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqaddh_u16:
|
||||
ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqaddu;
|
||||
s = "vqaddu"; usgn = true; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqadds_u32:
|
||||
ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqaddu;
|
||||
s = "vqaddu"; usgn = true; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqaddd_u64:
|
||||
ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqaddu;
|
||||
s = "vqaddu"; usgn = true; OverloadInt = true; break;
|
||||
Int = Intrinsic::aarch64_neon_vqaddu;
|
||||
s = "vqaddu"; OverloadInt = true; break;
|
||||
// Scalar Saturating Sub
|
||||
case AArch64::BI__builtin_neon_vqsubb_s8:
|
||||
ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqsubs;
|
||||
s = "vqsubs"; usgn = false; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqsubh_s16:
|
||||
ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqsubs;
|
||||
s = "vqsubs"; usgn = false; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqsubs_s32:
|
||||
ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqsubs;
|
||||
s = "vqsubs"; usgn = false; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqsubd_s64:
|
||||
ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqsubs;
|
||||
s = "vqsubs"; usgn = false; OverloadInt = true; break;
|
||||
Int = Intrinsic::aarch64_neon_vqsubs;
|
||||
s = "vqsubs"; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqsubb_u8:
|
||||
ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqsubu;
|
||||
s = "vqsubu"; usgn = true; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqsubh_u16:
|
||||
ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqsubu;
|
||||
s = "vqsubu"; usgn = true; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqsubs_u32:
|
||||
ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqsubu;
|
||||
s = "vqsubu"; usgn = true; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqsubd_u64:
|
||||
ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqsubu;
|
||||
s = "vqsubu"; usgn = true; OverloadInt = true; break;
|
||||
Int = Intrinsic::aarch64_neon_vqsubu;
|
||||
s = "vqsubu"; OverloadInt = true; break;
|
||||
// Scalar Shift Left
|
||||
case AArch64::BI__builtin_neon_vshld_s64:
|
||||
ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vshlds;
|
||||
s = "vshlds"; usgn = false; OverloadInt=false; break;
|
||||
Int = Intrinsic::aarch64_neon_vshlds;
|
||||
s = "vshlds"; break;
|
||||
case AArch64::BI__builtin_neon_vshld_u64:
|
||||
ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vshldu;
|
||||
s = "vshldu"; usgn = true; OverloadInt = false; break;
|
||||
Int = Intrinsic::aarch64_neon_vshldu;
|
||||
s = "vshldu"; break;
|
||||
// Scalar Saturating Shift Left
|
||||
case AArch64::BI__builtin_neon_vqshlb_s8:
|
||||
ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqshls;
|
||||
s = "vqshls"; usgn = false; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqshlh_s16:
|
||||
ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqshls;
|
||||
s = "vqshls"; usgn = false; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqshls_s32:
|
||||
ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqshls;
|
||||
s = "vqshls"; usgn = false; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqshld_s64:
|
||||
ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqshls;
|
||||
s = "vqshls"; usgn = false; OverloadInt = true; break;
|
||||
Int = Intrinsic::aarch64_neon_vqshls;
|
||||
s = "vqshls"; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqshlb_u8:
|
||||
ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqshlu;
|
||||
s = "vqshlu"; usgn = true; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqshlh_u16:
|
||||
ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqshlu;
|
||||
s = "vqshlu"; usgn = true; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqshls_u32:
|
||||
ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqshlu;
|
||||
s = "vqshlu"; usgn = true; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqshld_u64:
|
||||
ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqshlu;
|
||||
s = "vqshlu"; usgn = true; OverloadInt = true; break;
|
||||
Int = Intrinsic::aarch64_neon_vqshlu;
|
||||
s = "vqshlu"; OverloadInt = true; break;
|
||||
// Scalar Rouding Shift Left
|
||||
case AArch64::BI__builtin_neon_vrshld_s64:
|
||||
ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vrshlds;
|
||||
s = "vrshlds"; usgn = false; OverloadInt=false; break;
|
||||
Int = Intrinsic::aarch64_neon_vrshlds;
|
||||
s = "vrshlds"; break;
|
||||
case AArch64::BI__builtin_neon_vrshld_u64:
|
||||
ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vrshldu;
|
||||
s = "vrshldu"; usgn = true; OverloadInt=false; break;
|
||||
Int = Intrinsic::aarch64_neon_vrshldu;
|
||||
s = "vrshldu"; break;
|
||||
// Scalar Saturating Rouding Shift Left
|
||||
case AArch64::BI__builtin_neon_vqrshlb_s8:
|
||||
ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqrshls;
|
||||
s = "vqrshls"; usgn = false; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqrshlh_s16:
|
||||
ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqrshls;
|
||||
s = "vqrshls"; usgn = false; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqrshls_s32:
|
||||
ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqrshls;
|
||||
s = "vqrshls"; usgn = false; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqrshld_s64:
|
||||
ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqrshls;
|
||||
s = "vqrshls"; usgn = false; OverloadInt = true; break;
|
||||
Int = Intrinsic::aarch64_neon_vqrshls;
|
||||
s = "vqrshls"; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqrshlb_u8:
|
||||
ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqrshlu;
|
||||
s = "vqrshlu"; usgn = true; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqrshlh_u16:
|
||||
ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqrshlu;
|
||||
s = "vqrshlu"; usgn = true; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqrshls_u32:
|
||||
ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqrshlu;
|
||||
s = "vqrshlu"; usgn = true; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqrshld_u64:
|
||||
ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqrshlu;
|
||||
s = "vqrshlu"; usgn = true; OverloadInt = true; break;
|
||||
Int = Intrinsic::aarch64_neon_vqrshlu;
|
||||
s = "vqrshlu"; OverloadInt = true; break;
|
||||
// Scalar Reduce Pairwise Add
|
||||
case AArch64::BI__builtin_neon_vpaddd_s64:
|
||||
Int = Intrinsic::aarch64_neon_vpadd; s = "vpadd";
|
||||
OverloadInt = false; break;
|
||||
break;
|
||||
case AArch64::BI__builtin_neon_vpadds_f32:
|
||||
Int = Intrinsic::aarch64_neon_vpfadd; s = "vpfadd";
|
||||
OverloadInt = false; break;
|
||||
break;
|
||||
case AArch64::BI__builtin_neon_vpaddd_f64:
|
||||
Int = Intrinsic::aarch64_neon_vpfaddq; s = "vpfaddq";
|
||||
OverloadInt = false; break;
|
||||
break;
|
||||
// Scalar Reduce Pairwise Floating Point Max
|
||||
case AArch64::BI__builtin_neon_vpmaxs_f32:
|
||||
Int = Intrinsic::aarch64_neon_vpmax; s = "vpmax";
|
||||
OverloadInt = false; break;
|
||||
break;
|
||||
case AArch64::BI__builtin_neon_vpmaxqd_f64:
|
||||
Int = Intrinsic::aarch64_neon_vpmaxq; s = "vpmaxq";
|
||||
OverloadInt = false; break;
|
||||
break;
|
||||
// Scalar Reduce Pairwise Floating Point Min
|
||||
case AArch64::BI__builtin_neon_vpmins_f32:
|
||||
Int = Intrinsic::aarch64_neon_vpmin; s = "vpmin";
|
||||
OverloadInt = false; break;
|
||||
break;
|
||||
case AArch64::BI__builtin_neon_vpminqd_f64:
|
||||
Int = Intrinsic::aarch64_neon_vpminq; s = "vpminq";
|
||||
OverloadInt = false; break;
|
||||
break;
|
||||
// Scalar Reduce Pairwise Floating Point Maxnm
|
||||
case AArch64::BI__builtin_neon_vpmaxnms_f32:
|
||||
Int = Intrinsic::aarch64_neon_vpfmaxnm; s = "vpfmaxnm";
|
||||
OverloadInt = false; break;
|
||||
break;
|
||||
case AArch64::BI__builtin_neon_vpmaxnmqd_f64:
|
||||
Int = Intrinsic::aarch64_neon_vpfmaxnmq; s = "vpfmaxnmq";
|
||||
OverloadInt = false; break;
|
||||
// Scalar Reduce Pairwise Floating Point Minnm
|
||||
break;
|
||||
// Scalar Reduce Pairwise Floating Point Minnm
|
||||
case AArch64::BI__builtin_neon_vpminnms_f32:
|
||||
Int = Intrinsic::aarch64_neon_vpfminnm; s = "vpfminnm";
|
||||
OverloadInt = false; break;
|
||||
break;
|
||||
case AArch64::BI__builtin_neon_vpminnmqd_f64:
|
||||
Int = Intrinsic::aarch64_neon_vpfminnmq; s = "vpfminnmq";
|
||||
OverloadInt = false; break;
|
||||
break;
|
||||
// The followings are intrinsics with scalar results generated AcrossVec vectors
|
||||
case AArch64::BI__builtin_neon_vaddlv_s8:
|
||||
case AArch64::BI__builtin_neon_vaddlv_s16:
|
||||
case AArch64::BI__builtin_neon_vaddlvq_s8:
|
||||
case AArch64::BI__builtin_neon_vaddlvq_s16:
|
||||
case AArch64::BI__builtin_neon_vaddlvq_s32:
|
||||
Int = Intrinsic::aarch64_neon_saddlv;
|
||||
AcrossVec = true; ExtendEle = true; s = "saddlv"; break;
|
||||
case AArch64::BI__builtin_neon_vaddlv_u8:
|
||||
case AArch64::BI__builtin_neon_vaddlv_u16:
|
||||
case AArch64::BI__builtin_neon_vaddlvq_u8:
|
||||
case AArch64::BI__builtin_neon_vaddlvq_u16:
|
||||
case AArch64::BI__builtin_neon_vaddlvq_u32:
|
||||
Int = Intrinsic::aarch64_neon_uaddlv;
|
||||
AcrossVec = true; ExtendEle = true; s = "uaddlv"; break;
|
||||
case AArch64::BI__builtin_neon_vmaxv_s8:
|
||||
case AArch64::BI__builtin_neon_vmaxv_s16:
|
||||
case AArch64::BI__builtin_neon_vmaxvq_s8:
|
||||
case AArch64::BI__builtin_neon_vmaxvq_s16:
|
||||
case AArch64::BI__builtin_neon_vmaxvq_s32:
|
||||
Int = Intrinsic::aarch64_neon_smaxv;
|
||||
AcrossVec = true; ExtendEle = false; s = "smaxv"; break;
|
||||
case AArch64::BI__builtin_neon_vmaxv_u8:
|
||||
case AArch64::BI__builtin_neon_vmaxv_u16:
|
||||
case AArch64::BI__builtin_neon_vmaxvq_u8:
|
||||
case AArch64::BI__builtin_neon_vmaxvq_u16:
|
||||
case AArch64::BI__builtin_neon_vmaxvq_u32:
|
||||
Int = Intrinsic::aarch64_neon_umaxv;
|
||||
AcrossVec = true; ExtendEle = false; s = "umaxv"; break;
|
||||
case AArch64::BI__builtin_neon_vminv_s8:
|
||||
case AArch64::BI__builtin_neon_vminv_s16:
|
||||
case AArch64::BI__builtin_neon_vminvq_s8:
|
||||
case AArch64::BI__builtin_neon_vminvq_s16:
|
||||
case AArch64::BI__builtin_neon_vminvq_s32:
|
||||
Int = Intrinsic::aarch64_neon_sminv;
|
||||
AcrossVec = true; ExtendEle = false; s = "sminv"; break;
|
||||
case AArch64::BI__builtin_neon_vminv_u8:
|
||||
case AArch64::BI__builtin_neon_vminv_u16:
|
||||
case AArch64::BI__builtin_neon_vminvq_u8:
|
||||
case AArch64::BI__builtin_neon_vminvq_u16:
|
||||
case AArch64::BI__builtin_neon_vminvq_u32:
|
||||
Int = Intrinsic::aarch64_neon_uminv;
|
||||
AcrossVec = true; ExtendEle = false; s = "uminv"; break;
|
||||
case AArch64::BI__builtin_neon_vaddv_s8:
|
||||
case AArch64::BI__builtin_neon_vaddv_s16:
|
||||
case AArch64::BI__builtin_neon_vaddvq_s8:
|
||||
case AArch64::BI__builtin_neon_vaddvq_s16:
|
||||
case AArch64::BI__builtin_neon_vaddvq_s32:
|
||||
case AArch64::BI__builtin_neon_vaddv_u8:
|
||||
case AArch64::BI__builtin_neon_vaddv_u16:
|
||||
case AArch64::BI__builtin_neon_vaddvq_u8:
|
||||
case AArch64::BI__builtin_neon_vaddvq_u16:
|
||||
case AArch64::BI__builtin_neon_vaddvq_u32:
|
||||
Int = Intrinsic::aarch64_neon_vaddv;
|
||||
AcrossVec = true; ExtendEle = false; s = "vaddv"; break;
|
||||
case AArch64::BI__builtin_neon_vmaxvq_f32:
|
||||
Int = Intrinsic::aarch64_neon_vmaxv;
|
||||
AcrossVec = true; ExtendEle = false; s = "vmaxv"; break;
|
||||
case AArch64::BI__builtin_neon_vminvq_f32:
|
||||
Int = Intrinsic::aarch64_neon_vminv;
|
||||
AcrossVec = true; ExtendEle = false; s = "vminv"; break;
|
||||
case AArch64::BI__builtin_neon_vmaxnmvq_f32:
|
||||
Int = Intrinsic::aarch64_neon_vmaxnmv;
|
||||
AcrossVec = true; ExtendEle = false; s = "vmaxnmv"; break;
|
||||
case AArch64::BI__builtin_neon_vminnmvq_f32:
|
||||
Int = Intrinsic::aarch64_neon_vminnmv;
|
||||
AcrossVec = true; ExtendEle = false; s = "vminnmv"; break;
|
||||
}
|
||||
|
||||
if (!Int)
|
||||
return 0;
|
||||
|
||||
// AArch64 scalar builtin that returns scalar type
|
||||
// and should be mapped to AArch64 intrinsic that takes
|
||||
// one-element vector type arguments and returns
|
||||
// and should be mapped to AArch64 intrinsic that returns
|
||||
// one-element vector type.
|
||||
llvm::Type *Ty = 0;
|
||||
Function *F = 0;
|
||||
if (OverloadInt) {
|
||||
SmallVector<Value *, 4> Ops;
|
||||
if (AcrossVec) {
|
||||
// Gen arg type
|
||||
const Expr *Arg = E->getArg(E->getNumArgs()-1);
|
||||
llvm::Type *Ty = CGF.ConvertType(Arg->getType());
|
||||
llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
|
||||
llvm::Type *ETy = VTy->getElementType();
|
||||
llvm::VectorType *RTy = llvm::VectorType::get(ETy, 1);
|
||||
|
||||
if (ExtendEle) {
|
||||
assert(!ETy->isFloatingPointTy());
|
||||
RTy = llvm::VectorType::getExtendedElementVectorType(RTy);
|
||||
}
|
||||
|
||||
llvm::Type *Tys[2] = {RTy, VTy};
|
||||
F = CGF.CGM.getIntrinsic(Int, Tys);
|
||||
assert(E->getNumArgs() == 1);
|
||||
}
|
||||
else if (OverloadInt) {
|
||||
// Determine the type of this overloaded AArch64 intrinsic
|
||||
NeonTypeFlags Type(ET, usgn, false);
|
||||
llvm::VectorType *VTy = GetNeonType(&CGF, Type, true);
|
||||
Ty = VTy;
|
||||
if (!Ty)
|
||||
return 0;
|
||||
F = CGF.CGM.getIntrinsic(Int, Ty);
|
||||
const Expr *Arg = E->getArg(E->getNumArgs()-1);
|
||||
llvm::Type *Ty = CGF.ConvertType(Arg->getType());
|
||||
llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1);
|
||||
assert(VTy);
|
||||
|
||||
F = CGF.CGM.getIntrinsic(Int, VTy);
|
||||
} else
|
||||
F = CGF.CGM.getIntrinsic(Int);
|
||||
|
||||
for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
|
||||
Ops.push_back(CGF.EmitScalarExpr(E->getArg(i)));
|
||||
}
|
||||
|
||||
Value *Result = CGF.EmitNeonCall(F, Ops, s);
|
||||
llvm::Type *ResultType = CGF.ConvertType(E->getType());
|
||||
// AArch64 intrinsic one-element vector type cast to
|
||||
|
@ -1959,7 +1995,7 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
|
|||
}
|
||||
|
||||
Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
||||
const CallExpr *E) {
|
||||
const CallExpr *E) {
|
||||
|
||||
// Process AArch64 scalar builtins
|
||||
if (Value *Result = EmitAArch64ScalarBuiltinExpr(*this, BuiltinID, E))
|
||||
|
|
|
@ -0,0 +1,271 @@
|
|||
// REQUIRES: aarch64-registered-target
|
||||
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
|
||||
// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
|
||||
|
||||
// Test new aarch64 intrinsics and types
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
int16_t test_vaddlv_s8(int8x8_t a) {
|
||||
// CHECK: test_vaddlv_s8
|
||||
return vaddlv_s8(a);
|
||||
// CHECK: saddlv {{h[0-9]+}}, {{v[0-9]+}}.8b
|
||||
}
|
||||
|
||||
int32_t test_vaddlv_s16(int16x4_t a) {
|
||||
// CHECK: test_vaddlv_s16
|
||||
return vaddlv_s16(a);
|
||||
// CHECK: saddlv {{s[0-9]+}}, {{v[0-9]+}}.4h
|
||||
}
|
||||
|
||||
uint16_t test_vaddlv_u8(uint8x8_t a) {
|
||||
// CHECK: test_vaddlv_u8
|
||||
return vaddlv_u8(a);
|
||||
// CHECK: uaddlv {{h[0-9]+}}, {{v[0-9]+}}.8b
|
||||
}
|
||||
|
||||
uint32_t test_vaddlv_u16(uint16x4_t a) {
|
||||
// CHECK: test_vaddlv_u16
|
||||
return vaddlv_u16(a);
|
||||
// CHECK: uaddlv {{s[0-9]+}}, {{v[0-9]+}}.4h
|
||||
}
|
||||
|
||||
int16_t test_vaddlvq_s8(int8x16_t a) {
|
||||
// CHECK: test_vaddlvq_s8
|
||||
return vaddlvq_s8(a);
|
||||
// CHECK: saddlv {{h[0-9]+}}, {{v[0-9]+}}.16b
|
||||
}
|
||||
|
||||
int32_t test_vaddlvq_s16(int16x8_t a) {
|
||||
// CHECK: test_vaddlvq_s16
|
||||
return vaddlvq_s16(a);
|
||||
// CHECK: saddlv {{s[0-9]+}}, {{v[0-9]+}}.8h
|
||||
}
|
||||
|
||||
int64_t test_vaddlvq_s32(int32x4_t a) {
|
||||
// CHECK: test_vaddlvq_s32
|
||||
return vaddlvq_s32(a);
|
||||
// CHECK: saddlv {{d[0-9]+}}, {{v[0-9]+}}.4s
|
||||
}
|
||||
|
||||
uint16_t test_vaddlvq_u8(uint8x16_t a) {
|
||||
// CHECK: test_vaddlvq_u8
|
||||
return vaddlvq_u8(a);
|
||||
// CHECK: uaddlv {{h[0-9]+}}, {{v[0-9]+}}.16b
|
||||
}
|
||||
|
||||
uint32_t test_vaddlvq_u16(uint16x8_t a) {
|
||||
// CHECK: test_vaddlvq_u16
|
||||
return vaddlvq_u16(a);
|
||||
// CHECK: uaddlv {{s[0-9]+}}, {{v[0-9]+}}.8h
|
||||
}
|
||||
|
||||
uint64_t test_vaddlvq_u32(uint32x4_t a) {
|
||||
// CHECK: test_vaddlvq_u32
|
||||
return vaddlvq_u32(a);
|
||||
// CHECK: uaddlv {{d[0-9]+}}, {{v[0-9]+}}.4s
|
||||
}
|
||||
|
||||
int8_t test_vmaxv_s8(int8x8_t a) {
|
||||
// CHECK: test_vmaxv_s8
|
||||
return vmaxv_s8(a);
|
||||
// CHECK: smaxv {{b[0-9]+}}, {{v[0-9]+}}.8b
|
||||
}
|
||||
|
||||
int16_t test_vmaxv_s16(int16x4_t a) {
|
||||
// CHECK: test_vmaxv_s16
|
||||
return vmaxv_s16(a);
|
||||
// CHECK: smaxv {{h[0-9]+}}, {{v[0-9]+}}.4h
|
||||
}
|
||||
|
||||
uint8_t test_vmaxv_u8(uint8x8_t a) {
|
||||
// CHECK: test_vmaxv_u8
|
||||
return vmaxv_u8(a);
|
||||
// CHECK: umaxv {{b[0-9]+}}, {{v[0-9]+}}.8b
|
||||
}
|
||||
|
||||
uint16_t test_vmaxv_u16(uint16x4_t a) {
|
||||
// CHECK: test_vmaxv_u16
|
||||
return vmaxv_u16(a);
|
||||
// CHECK: umaxv {{h[0-9]+}}, {{v[0-9]+}}.4h
|
||||
}
|
||||
|
||||
int8_t test_vmaxvq_s8(int8x16_t a) {
|
||||
// CHECK: test_vmaxvq_s8
|
||||
return vmaxvq_s8(a);
|
||||
// CHECK: smaxv {{b[0-9]+}}, {{v[0-9]+}}.16b
|
||||
}
|
||||
|
||||
int16_t test_vmaxvq_s16(int16x8_t a) {
|
||||
// CHECK: test_vmaxvq_s16
|
||||
return vmaxvq_s16(a);
|
||||
// CHECK: smaxv {{h[0-9]+}}, {{v[0-9]+}}.8h
|
||||
}
|
||||
|
||||
int32_t test_vmaxvq_s32(int32x4_t a) {
|
||||
// CHECK: test_vmaxvq_s32
|
||||
return vmaxvq_s32(a);
|
||||
// CHECK: smaxv {{s[0-9]+}}, {{v[0-9]+}}.4s
|
||||
}
|
||||
|
||||
uint8_t test_vmaxvq_u8(uint8x16_t a) {
|
||||
// CHECK: test_vmaxvq_u8
|
||||
return vmaxvq_u8(a);
|
||||
// CHECK: umaxv {{b[0-9]+}}, {{v[0-9]+}}.16b
|
||||
}
|
||||
|
||||
uint16_t test_vmaxvq_u16(uint16x8_t a) {
|
||||
// CHECK: test_vmaxvq_u16
|
||||
return vmaxvq_u16(a);
|
||||
// CHECK: umaxv {{h[0-9]+}}, {{v[0-9]+}}.8h
|
||||
}
|
||||
|
||||
uint32_t test_vmaxvq_u32(uint32x4_t a) {
|
||||
// CHECK: test_vmaxvq_u32
|
||||
return vmaxvq_u32(a);
|
||||
// CHECK: umaxv {{s[0-9]+}}, {{v[0-9]+}}.4s
|
||||
}
|
||||
|
||||
int8_t test_vminv_s8(int8x8_t a) {
|
||||
// CHECK: test_vminv_s8
|
||||
return vminv_s8(a);
|
||||
// CHECK: sminv {{b[0-9]+}}, {{v[0-9]+}}.8b
|
||||
}
|
||||
|
||||
int16_t test_vminv_s16(int16x4_t a) {
|
||||
// CHECK: test_vminv_s16
|
||||
return vminv_s16(a);
|
||||
// CHECK: sminv {{h[0-9]+}}, {{v[0-9]+}}.4h
|
||||
}
|
||||
|
||||
uint8_t test_vminv_u8(uint8x8_t a) {
|
||||
// CHECK: test_vminv_u8
|
||||
return vminv_u8(a);
|
||||
// CHECK: uminv {{b[0-9]+}}, {{v[0-9]+}}.8b
|
||||
}
|
||||
|
||||
uint16_t test_vminv_u16(uint16x4_t a) {
|
||||
// CHECK: test_vminv_u16
|
||||
return vminv_u16(a);
|
||||
// CHECK: uminv {{h[0-9]+}}, {{v[0-9]+}}.4h
|
||||
}
|
||||
|
||||
int8_t test_vminvq_s8(int8x16_t a) {
|
||||
// CHECK: test_vminvq_s8
|
||||
return vminvq_s8(a);
|
||||
// CHECK: sminv {{b[0-9]+}}, {{v[0-9]+}}.16b
|
||||
}
|
||||
|
||||
int16_t test_vminvq_s16(int16x8_t a) {
|
||||
// CHECK: test_vminvq_s16
|
||||
return vminvq_s16(a);
|
||||
// CHECK: sminv {{h[0-9]+}}, {{v[0-9]+}}.8h
|
||||
}
|
||||
|
||||
int32_t test_vminvq_s32(int32x4_t a) {
|
||||
// CHECK: test_vminvq_s32
|
||||
return vminvq_s32(a);
|
||||
// CHECK: sminv {{s[0-9]+}}, {{v[0-9]+}}.4s
|
||||
}
|
||||
|
||||
uint8_t test_vminvq_u8(uint8x16_t a) {
|
||||
// CHECK: test_vminvq_u8
|
||||
return vminvq_u8(a);
|
||||
// CHECK: uminv {{b[0-9]+}}, {{v[0-9]+}}.16b
|
||||
}
|
||||
|
||||
uint16_t test_vminvq_u16(uint16x8_t a) {
|
||||
// CHECK: test_vminvq_u16
|
||||
return vminvq_u16(a);
|
||||
// CHECK: uminv {{h[0-9]+}}, {{v[0-9]+}}.8h
|
||||
}
|
||||
|
||||
uint32_t test_vminvq_u32(uint32x4_t a) {
|
||||
// CHECK: test_vminvq_u32
|
||||
return vminvq_u32(a);
|
||||
// CHECK: uminv {{s[0-9]+}}, {{v[0-9]+}}.4s
|
||||
}
|
||||
|
||||
int8_t test_vaddv_s8(int8x8_t a) {
|
||||
// CHECK: test_vaddv_s8
|
||||
return vaddv_s8(a);
|
||||
// CHECK: addv {{b[0-9]+}}, {{v[0-9]+}}.8b
|
||||
}
|
||||
|
||||
int16_t test_vaddv_s16(int16x4_t a) {
|
||||
// CHECK: test_vaddv_s16
|
||||
return vaddv_s16(a);
|
||||
// CHECK: addv {{h[0-9]+}}, {{v[0-9]+}}.4h
|
||||
}
|
||||
|
||||
uint8_t test_vaddv_u8(uint8x8_t a) {
|
||||
// CHECK: test_vaddv_u8
|
||||
return vaddv_u8(a);
|
||||
// CHECK: addv {{b[0-9]+}}, {{v[0-9]+}}.8b
|
||||
}
|
||||
|
||||
uint16_t test_vaddv_u16(uint16x4_t a) {
|
||||
// CHECK: test_vaddv_u16
|
||||
return vaddv_u16(a);
|
||||
// CHECK: addv {{h[0-9]+}}, {{v[0-9]+}}.4h
|
||||
}
|
||||
|
||||
int8_t test_vaddvq_s8(int8x16_t a) {
|
||||
// CHECK: test_vaddvq_s8
|
||||
return vaddvq_s8(a);
|
||||
// CHECK: addv {{b[0-9]+}}, {{v[0-9]+}}.16b
|
||||
}
|
||||
|
||||
int16_t test_vaddvq_s16(int16x8_t a) {
|
||||
// CHECK: test_vaddvq_s16
|
||||
return vaddvq_s16(a);
|
||||
// CHECK: addv {{h[0-9]+}}, {{v[0-9]+}}.8h
|
||||
}
|
||||
|
||||
int32_t test_vaddvq_s32(int32x4_t a) {
|
||||
// CHECK: test_vaddvq_s32
|
||||
return vaddvq_s32(a);
|
||||
// CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s
|
||||
}
|
||||
|
||||
uint8_t test_vaddvq_u8(uint8x16_t a) {
|
||||
// CHECK: test_vaddvq_u8
|
||||
return vaddvq_u8(a);
|
||||
// CHECK: addv {{b[0-9]+}}, {{v[0-9]+}}.16b
|
||||
}
|
||||
|
||||
uint16_t test_vaddvq_u16(uint16x8_t a) {
|
||||
// CHECK: test_vaddvq_u16
|
||||
return vaddvq_u16(a);
|
||||
// CHECK: addv {{h[0-9]+}}, {{v[0-9]+}}.8h
|
||||
}
|
||||
|
||||
uint32_t test_vaddvq_u32(uint32x4_t a) {
|
||||
// CHECK: test_vaddvq_u32
|
||||
return vaddvq_u32(a);
|
||||
// CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s
|
||||
}
|
||||
|
||||
float32_t test_vmaxvq_f32(float32x4_t a) {
|
||||
// CHECK: test_vmaxvq_f32
|
||||
return vmaxvq_f32(a);
|
||||
// CHECK: fmaxv {{s[0-9]+}}, {{v[0-9]+}}.4s
|
||||
}
|
||||
|
||||
float32_t test_vminvq_f32(float32x4_t a) {
|
||||
// CHECK: test_vminvq_f32
|
||||
return vminvq_f32(a);
|
||||
// CHECK: fminv {{s[0-9]+}}, {{v[0-9]+}}.4s
|
||||
}
|
||||
|
||||
float32_t test_vmaxnmvq_f32(float32x4_t a) {
|
||||
// CHECK: test_vmaxnmvq_f32
|
||||
return vmaxnmvq_f32(a);
|
||||
// CHECK: fmaxnmv {{s[0-9]+}}, {{v[0-9]+}}.4s
|
||||
}
|
||||
|
||||
float32_t test_vminnmvq_f32(float32x4_t a) {
|
||||
// CHECK: test_vminnmvq_f32
|
||||
return vminnmvq_f32(a);
|
||||
// CHECK: fminnmv {{s[0-9]+}}, {{v[0-9]+}}.4s
|
||||
}
|
|
@ -484,6 +484,8 @@ static char ModType(const char mod, char type, bool &quad, bool &poly,
|
|||
scal = true;
|
||||
usgn = true;
|
||||
break;
|
||||
case 'r':
|
||||
type = Widen(type);
|
||||
case 's':
|
||||
case 'a':
|
||||
scal = true;
|
||||
|
@ -1878,6 +1880,12 @@ static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
|
|||
return Flags.getFlags();
|
||||
}
|
||||
|
||||
static bool ProtoHasScalar(const std::string proto)
|
||||
{
|
||||
return (proto.find('s') != std::string::npos
|
||||
|| proto.find('r') != std::string::npos);
|
||||
}
|
||||
|
||||
// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
|
||||
static std::string GenBuiltin(const std::string &name, const std::string &proto,
|
||||
StringRef typestr, ClassKind ck) {
|
||||
|
@ -1892,7 +1900,7 @@ static std::string GenBuiltin(const std::string &name, const std::string &proto,
|
|||
// Check if the prototype has a scalar operand with the type of the vector
|
||||
// elements. If not, bitcasting the args will take care of arg checking.
|
||||
// The actual signedness etc. will be taken care of with special enums.
|
||||
if (proto.find('s') == std::string::npos)
|
||||
if (!ProtoHasScalar(proto))
|
||||
ck = ClassB;
|
||||
|
||||
if (proto[0] != 'v') {
|
||||
|
@ -2003,7 +2011,7 @@ static std::string GenBuiltinDef(const std::string &name,
|
|||
// If all types are the same size, bitcasting the args will take care
|
||||
// of arg checking. The actual signedness etc. will be taken care of with
|
||||
// special enums.
|
||||
if (proto.find('s') == std::string::npos)
|
||||
if (!ProtoHasScalar(proto))
|
||||
ck = ClassB;
|
||||
|
||||
s += MangleName(name, typestr, ck);
|
||||
|
@ -2413,7 +2421,7 @@ NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
|
|||
else
|
||||
PrintFatalError(R->getLoc(),
|
||||
"Fixed point convert name should contains \"32\" or \"64\"");
|
||||
} else if (Proto.find('s') == std::string::npos) {
|
||||
} else if (!ProtoHasScalar(Proto)) {
|
||||
// Builtins which are overloaded by type will need to have their upper
|
||||
// bound computed at Sema time based on the type constant.
|
||||
ck = ClassB;
|
||||
|
@ -2510,7 +2518,7 @@ NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
|
|||
|
||||
// Functions which have a scalar argument cannot be overloaded, no need to
|
||||
// check them if we are emitting the type checking code.
|
||||
if (Proto.find('s') != std::string::npos)
|
||||
if (ProtoHasScalar(Proto))
|
||||
continue;
|
||||
|
||||
SmallVector<StringRef, 16> TypeVec;
|
||||
|
|
Loading…
Reference in New Issue