forked from OSchip/llvm-project
ARM64: enable aarch64-neon-intrinsics.c test
This adds support for the various NEON intrinsics used by aarch64-neon-intrinsics.c (originally written for AArch64) and enables the test. My implementations are designed to be semantically correct, the actual code quality looks like its a wash between the two backends, and is frequently different (hence the large number of CHECK changes). llvm-svn: 205210
This commit is contained in:
parent
5081cd0f81
commit
0c68faa455
|
@ -2541,6 +2541,7 @@ static NeonIntrinsicInfo ARM64SIMDIntrinsicMap[] = {
|
||||||
static NeonIntrinsicInfo ARM64SISDIntrinsicMap[] = {
|
static NeonIntrinsicInfo ARM64SISDIntrinsicMap[] = {
|
||||||
NEONMAP1(vabdd_f64, arm64_sisd_fabd, Add1ArgType),
|
NEONMAP1(vabdd_f64, arm64_sisd_fabd, Add1ArgType),
|
||||||
NEONMAP1(vabds_f32, arm64_sisd_fabd, Add1ArgType),
|
NEONMAP1(vabds_f32, arm64_sisd_fabd, Add1ArgType),
|
||||||
|
NEONMAP1(vabsd_s64, arm64_neon_abs, Add1ArgType),
|
||||||
NEONMAP1(vaddlv_s32, arm64_neon_saddlv, AddRetType | Add1ArgType),
|
NEONMAP1(vaddlv_s32, arm64_neon_saddlv, AddRetType | Add1ArgType),
|
||||||
NEONMAP1(vaddlv_u32, arm64_neon_uaddlv, AddRetType | Add1ArgType),
|
NEONMAP1(vaddlv_u32, arm64_neon_uaddlv, AddRetType | Add1ArgType),
|
||||||
NEONMAP1(vaddlvq_s32, arm64_neon_saddlv, AddRetType | Add1ArgType),
|
NEONMAP1(vaddlvq_s32, arm64_neon_saddlv, AddRetType | Add1ArgType),
|
||||||
|
@ -2609,6 +2610,16 @@ static NeonIntrinsicInfo ARM64SISDIntrinsicMap[] = {
|
||||||
NEONMAP1(vminvq_u32, arm64_neon_uminv, AddRetType | Add1ArgType),
|
NEONMAP1(vminvq_u32, arm64_neon_uminv, AddRetType | Add1ArgType),
|
||||||
NEONMAP1(vmulxd_f64, arm64_neon_fmulx, Add1ArgType),
|
NEONMAP1(vmulxd_f64, arm64_neon_fmulx, Add1ArgType),
|
||||||
NEONMAP1(vmulxs_f32, arm64_neon_fmulx, Add1ArgType),
|
NEONMAP1(vmulxs_f32, arm64_neon_fmulx, Add1ArgType),
|
||||||
|
NEONMAP1(vpaddd_s64, arm64_neon_uaddv, AddRetType | Add1ArgType),
|
||||||
|
NEONMAP1(vpaddd_u64, arm64_neon_uaddv, AddRetType | Add1ArgType),
|
||||||
|
NEONMAP1(vpmaxnmqd_f64, arm64_neon_fmaxnmv, AddRetType | Add1ArgType),
|
||||||
|
NEONMAP1(vpmaxnms_f32, arm64_neon_fmaxnmv, AddRetType | Add1ArgType),
|
||||||
|
NEONMAP1(vpmaxqd_f64, arm64_neon_fmaxv, AddRetType | Add1ArgType),
|
||||||
|
NEONMAP1(vpmaxs_f32, arm64_neon_fmaxv, AddRetType | Add1ArgType),
|
||||||
|
NEONMAP1(vpminnmqd_f64, arm64_neon_fminnmv, AddRetType | Add1ArgType),
|
||||||
|
NEONMAP1(vpminnms_f32, arm64_neon_fminnmv, AddRetType | Add1ArgType),
|
||||||
|
NEONMAP1(vpminqd_f64, arm64_neon_fminv, AddRetType | Add1ArgType),
|
||||||
|
NEONMAP1(vpmins_f32, arm64_neon_fminv, AddRetType | Add1ArgType),
|
||||||
NEONMAP1(vqabsb_s8, arm64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
|
NEONMAP1(vqabsb_s8, arm64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
|
||||||
NEONMAP1(vqabsd_s64, arm64_neon_sqabs, Add1ArgType),
|
NEONMAP1(vqabsd_s64, arm64_neon_sqabs, Add1ArgType),
|
||||||
NEONMAP1(vqabsh_s16, arm64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
|
NEONMAP1(vqabsh_s16, arm64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
|
||||||
|
@ -2691,8 +2702,14 @@ static NeonIntrinsicInfo ARM64SISDIntrinsicMap[] = {
|
||||||
NEONMAP1(vqsubh_u16, arm64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
|
NEONMAP1(vqsubh_u16, arm64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
|
||||||
NEONMAP1(vqsubs_s32, arm64_neon_sqsub, Add1ArgType),
|
NEONMAP1(vqsubs_s32, arm64_neon_sqsub, Add1ArgType),
|
||||||
NEONMAP1(vqsubs_u32, arm64_neon_uqsub, Add1ArgType),
|
NEONMAP1(vqsubs_u32, arm64_neon_uqsub, Add1ArgType),
|
||||||
|
NEONMAP1(vrecped_f64, arm64_neon_frecpe, Add1ArgType),
|
||||||
|
NEONMAP1(vrecpes_f32, arm64_neon_frecpe, Add1ArgType),
|
||||||
|
NEONMAP1(vrecpxd_f64, arm64_neon_frecpx, Add1ArgType),
|
||||||
|
NEONMAP1(vrecpxs_f32, arm64_neon_frecpx, Add1ArgType),
|
||||||
NEONMAP1(vrshld_s64, arm64_neon_srshl, Add1ArgType),
|
NEONMAP1(vrshld_s64, arm64_neon_srshl, Add1ArgType),
|
||||||
NEONMAP1(vrshld_u64, arm64_neon_urshl, Add1ArgType),
|
NEONMAP1(vrshld_u64, arm64_neon_urshl, Add1ArgType),
|
||||||
|
NEONMAP1(vrsqrted_f64, arm64_neon_frsqrte, Add1ArgType),
|
||||||
|
NEONMAP1(vrsqrtes_f32, arm64_neon_frsqrte, Add1ArgType),
|
||||||
NEONMAP1(vrsqrtsd_f64, arm64_neon_frsqrts, Add1ArgType),
|
NEONMAP1(vrsqrtsd_f64, arm64_neon_frsqrts, Add1ArgType),
|
||||||
NEONMAP1(vrsqrtss_f32, arm64_neon_frsqrts, Add1ArgType),
|
NEONMAP1(vrsqrtss_f32, arm64_neon_frsqrts, Add1ArgType),
|
||||||
NEONMAP1(vsha1cq_u32, arm64_crypto_sha1c, 0),
|
NEONMAP1(vsha1cq_u32, arm64_crypto_sha1c, 0),
|
||||||
|
@ -2703,8 +2720,16 @@ static NeonIntrinsicInfo ARM64SISDIntrinsicMap[] = {
|
||||||
NEONMAP1(vshld_u64, arm64_neon_ushl, Add1ArgType),
|
NEONMAP1(vshld_u64, arm64_neon_ushl, Add1ArgType),
|
||||||
NEONMAP1(vslid_n_s64, arm64_neon_vsli, Vectorize1ArgType),
|
NEONMAP1(vslid_n_s64, arm64_neon_vsli, Vectorize1ArgType),
|
||||||
NEONMAP1(vslid_n_u64, arm64_neon_vsli, Vectorize1ArgType),
|
NEONMAP1(vslid_n_u64, arm64_neon_vsli, Vectorize1ArgType),
|
||||||
|
NEONMAP1(vsqaddb_u8, arm64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
|
||||||
|
NEONMAP1(vsqaddd_u64, arm64_neon_usqadd, Add1ArgType),
|
||||||
|
NEONMAP1(vsqaddh_u16, arm64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
|
||||||
|
NEONMAP1(vsqadds_u32, arm64_neon_usqadd, Add1ArgType),
|
||||||
NEONMAP1(vsrid_n_s64, arm64_neon_vsri, Vectorize1ArgType),
|
NEONMAP1(vsrid_n_s64, arm64_neon_vsri, Vectorize1ArgType),
|
||||||
NEONMAP1(vsrid_n_u64, arm64_neon_vsri, Vectorize1ArgType),
|
NEONMAP1(vsrid_n_u64, arm64_neon_vsri, Vectorize1ArgType),
|
||||||
|
NEONMAP1(vuqaddb_s8, arm64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
|
||||||
|
NEONMAP1(vuqaddd_s64, arm64_neon_suqadd, Add1ArgType),
|
||||||
|
NEONMAP1(vuqaddh_s16, arm64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
|
||||||
|
NEONMAP1(vuqadds_s32, arm64_neon_suqadd, Add1ArgType),
|
||||||
};
|
};
|
||||||
|
|
||||||
#undef NEONMAP0
|
#undef NEONMAP0
|
||||||
|
@ -2783,10 +2808,31 @@ static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
|
||||||
const NeonIntrinsicInfo &SISDInfo,
|
const NeonIntrinsicInfo &SISDInfo,
|
||||||
SmallVectorImpl<Value *> &Ops,
|
SmallVectorImpl<Value *> &Ops,
|
||||||
const CallExpr *E) {
|
const CallExpr *E) {
|
||||||
|
unsigned BuiltinID = SISDInfo.BuiltinID;
|
||||||
unsigned int Int = SISDInfo.LLVMIntrinsic;
|
unsigned int Int = SISDInfo.LLVMIntrinsic;
|
||||||
unsigned Modifier = SISDInfo.TypeModifier;
|
unsigned Modifier = SISDInfo.TypeModifier;
|
||||||
const char *s = SISDInfo.NameHint;
|
const char *s = SISDInfo.NameHint;
|
||||||
|
|
||||||
|
switch (BuiltinID) {
|
||||||
|
case NEON::BI__builtin_neon_vcled_s64:
|
||||||
|
case NEON::BI__builtin_neon_vcled_u64:
|
||||||
|
case NEON::BI__builtin_neon_vcles_f32:
|
||||||
|
case NEON::BI__builtin_neon_vcled_f64:
|
||||||
|
case NEON::BI__builtin_neon_vcltd_s64:
|
||||||
|
case NEON::BI__builtin_neon_vcltd_u64:
|
||||||
|
case NEON::BI__builtin_neon_vclts_f32:
|
||||||
|
case NEON::BI__builtin_neon_vcltd_f64:
|
||||||
|
case NEON::BI__builtin_neon_vcales_f32:
|
||||||
|
case NEON::BI__builtin_neon_vcaled_f64:
|
||||||
|
case NEON::BI__builtin_neon_vcalts_f32:
|
||||||
|
case NEON::BI__builtin_neon_vcaltd_f64:
|
||||||
|
// Only one direction of comparisons actually exist, cmle is actually a cmge
|
||||||
|
// with swapped operands. The table gives us the right intrinsic but we
|
||||||
|
// still need to do the swap.
|
||||||
|
std::swap(Ops[0], Ops[1]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
assert(Int && "Generic code assumes a valid intrinsic");
|
assert(Int && "Generic code assumes a valid intrinsic");
|
||||||
|
|
||||||
// Determine the type(s) of this overloaded AArch64 intrinsic.
|
// Determine the type(s) of this overloaded AArch64 intrinsic.
|
||||||
|
@ -2955,23 +3001,6 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
|
||||||
case NEON::BI__builtin_neon_vsetq_lane_f64:
|
case NEON::BI__builtin_neon_vsetq_lane_f64:
|
||||||
return CGF.EmitARMBuiltinExpr(NEON::BI__builtin_neon_vset_lane_i8, E);
|
return CGF.EmitARMBuiltinExpr(NEON::BI__builtin_neon_vset_lane_i8, E);
|
||||||
|
|
||||||
case NEON::BI__builtin_neon_vcled_s64:
|
|
||||||
case NEON::BI__builtin_neon_vcled_u64:
|
|
||||||
case NEON::BI__builtin_neon_vcles_f32:
|
|
||||||
case NEON::BI__builtin_neon_vcled_f64:
|
|
||||||
case NEON::BI__builtin_neon_vcltd_s64:
|
|
||||||
case NEON::BI__builtin_neon_vcltd_u64:
|
|
||||||
case NEON::BI__builtin_neon_vclts_f32:
|
|
||||||
case NEON::BI__builtin_neon_vcltd_f64:
|
|
||||||
case NEON::BI__builtin_neon_vcales_f32:
|
|
||||||
case NEON::BI__builtin_neon_vcaled_f64:
|
|
||||||
case NEON::BI__builtin_neon_vcalts_f32:
|
|
||||||
case NEON::BI__builtin_neon_vcaltd_f64:
|
|
||||||
// Only one direction of comparisons actually exist, cmle is actually a cmge
|
|
||||||
// with swapped operands. The table gives us the right intrinsic but we
|
|
||||||
// still need to do the swap.
|
|
||||||
std::swap(Ops[0], Ops[1]);
|
|
||||||
break;
|
|
||||||
case NEON::BI__builtin_neon_vceqzd_s64:
|
case NEON::BI__builtin_neon_vceqzd_s64:
|
||||||
case NEON::BI__builtin_neon_vceqzd_u64:
|
case NEON::BI__builtin_neon_vceqzd_u64:
|
||||||
case NEON::BI__builtin_neon_vcgezd_s64:
|
case NEON::BI__builtin_neon_vcgezd_s64:
|
||||||
|
@ -5338,12 +5367,11 @@ Value *CodeGenFunction::EmitARM64BuiltinExpr(unsigned BuiltinID,
|
||||||
case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
|
case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
|
||||||
case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
|
case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
|
||||||
}
|
}
|
||||||
llvm::Type *Ty = llvm::Type::getInt64Ty(getLLVMContext());
|
|
||||||
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
||||||
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
|
||||||
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
|
Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
|
||||||
Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
|
Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
|
||||||
return Builder.CreateSExt(Ops[0], Ty, "vceqd");
|
return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
|
||||||
}
|
}
|
||||||
case NEON::BI__builtin_neon_vtstd_s64:
|
case NEON::BI__builtin_neon_vtstd_s64:
|
||||||
case NEON::BI__builtin_neon_vtstd_u64: {
|
case NEON::BI__builtin_neon_vtstd_u64: {
|
||||||
|
@ -5518,13 +5546,11 @@ Value *CodeGenFunction::EmitARM64BuiltinExpr(unsigned BuiltinID,
|
||||||
unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
|
unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
|
||||||
? Intrinsic::arm64_neon_urshl
|
? Intrinsic::arm64_neon_urshl
|
||||||
: Intrinsic::arm64_neon_srshl;
|
: Intrinsic::arm64_neon_srshl;
|
||||||
llvm::Type *VTy = llvm::VectorType::get(Int64Ty, 1);
|
Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
|
||||||
SmallVector<Value *, 2> ShiftOps;
|
Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
|
||||||
ShiftOps.push_back(Ops[1]);
|
Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Int64Ty), Ops[1],
|
||||||
ShiftOps.push_back(EmitScalarExpr(E->getArg(2)));
|
Builder.CreateSExt(Ops[2], Int64Ty));
|
||||||
Ops[1] =
|
return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
|
||||||
EmitNeonCall(CGM.getIntrinsic(Int, VTy), ShiftOps, "vrshr_n", 1, true);
|
|
||||||
return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[0], Int64Ty));
|
|
||||||
}
|
}
|
||||||
case NEON::BI__builtin_neon_vshld_n_s64:
|
case NEON::BI__builtin_neon_vshld_n_s64:
|
||||||
case NEON::BI__builtin_neon_vshld_n_u64: {
|
case NEON::BI__builtin_neon_vshld_n_u64: {
|
||||||
|
@ -6033,6 +6059,8 @@ Value *CodeGenFunction::EmitARM64BuiltinExpr(unsigned BuiltinID,
|
||||||
Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
|
Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
|
||||||
return Builder.CreateBitCast(Result, Ty);
|
return Builder.CreateBitCast(Result, Ty);
|
||||||
}
|
}
|
||||||
|
case NEON::BI__builtin_neon_vnegd_s64:
|
||||||
|
return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
|
||||||
case NEON::BI__builtin_neon_vpmaxnm_v:
|
case NEON::BI__builtin_neon_vpmaxnm_v:
|
||||||
case NEON::BI__builtin_neon_vpmaxnmq_v: {
|
case NEON::BI__builtin_neon_vpmaxnmq_v: {
|
||||||
Int = Intrinsic::arm64_neon_fmaxnmp;
|
Int = Intrinsic::arm64_neon_fmaxnmp;
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue