ARM64: enable aarch64-neon-intrinsics.c test

This adds support for the various NEON intrinsics used by
aarch64-neon-intrinsics.c (originally written for AArch64) and enables the
test.

My implementations are designed to be semantically correct, the actual code
quality looks like its a wash between the two backends, and is frequently
different (hence the large number of CHECK changes).

llvm-svn: 205210
This commit is contained in:
Tim Northover 2014-03-31 15:47:09 +00:00
parent 5081cd0f81
commit 0c68faa455
2 changed files with 927 additions and 1104 deletions

View File

@ -2541,6 +2541,7 @@ static NeonIntrinsicInfo ARM64SIMDIntrinsicMap[] = {
static NeonIntrinsicInfo ARM64SISDIntrinsicMap[] = {
NEONMAP1(vabdd_f64, arm64_sisd_fabd, Add1ArgType),
NEONMAP1(vabds_f32, arm64_sisd_fabd, Add1ArgType),
NEONMAP1(vabsd_s64, arm64_neon_abs, Add1ArgType),
NEONMAP1(vaddlv_s32, arm64_neon_saddlv, AddRetType | Add1ArgType),
NEONMAP1(vaddlv_u32, arm64_neon_uaddlv, AddRetType | Add1ArgType),
NEONMAP1(vaddlvq_s32, arm64_neon_saddlv, AddRetType | Add1ArgType),
@ -2609,6 +2610,16 @@ static NeonIntrinsicInfo ARM64SISDIntrinsicMap[] = {
NEONMAP1(vminvq_u32, arm64_neon_uminv, AddRetType | Add1ArgType),
NEONMAP1(vmulxd_f64, arm64_neon_fmulx, Add1ArgType),
NEONMAP1(vmulxs_f32, arm64_neon_fmulx, Add1ArgType),
NEONMAP1(vpaddd_s64, arm64_neon_uaddv, AddRetType | Add1ArgType),
NEONMAP1(vpaddd_u64, arm64_neon_uaddv, AddRetType | Add1ArgType),
NEONMAP1(vpmaxnmqd_f64, arm64_neon_fmaxnmv, AddRetType | Add1ArgType),
NEONMAP1(vpmaxnms_f32, arm64_neon_fmaxnmv, AddRetType | Add1ArgType),
NEONMAP1(vpmaxqd_f64, arm64_neon_fmaxv, AddRetType | Add1ArgType),
NEONMAP1(vpmaxs_f32, arm64_neon_fmaxv, AddRetType | Add1ArgType),
NEONMAP1(vpminnmqd_f64, arm64_neon_fminnmv, AddRetType | Add1ArgType),
NEONMAP1(vpminnms_f32, arm64_neon_fminnmv, AddRetType | Add1ArgType),
NEONMAP1(vpminqd_f64, arm64_neon_fminv, AddRetType | Add1ArgType),
NEONMAP1(vpmins_f32, arm64_neon_fminv, AddRetType | Add1ArgType),
NEONMAP1(vqabsb_s8, arm64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
NEONMAP1(vqabsd_s64, arm64_neon_sqabs, Add1ArgType),
NEONMAP1(vqabsh_s16, arm64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
@ -2691,8 +2702,14 @@ static NeonIntrinsicInfo ARM64SISDIntrinsicMap[] = {
NEONMAP1(vqsubh_u16, arm64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
NEONMAP1(vqsubs_s32, arm64_neon_sqsub, Add1ArgType),
NEONMAP1(vqsubs_u32, arm64_neon_uqsub, Add1ArgType),
NEONMAP1(vrecped_f64, arm64_neon_frecpe, Add1ArgType),
NEONMAP1(vrecpes_f32, arm64_neon_frecpe, Add1ArgType),
NEONMAP1(vrecpxd_f64, arm64_neon_frecpx, Add1ArgType),
NEONMAP1(vrecpxs_f32, arm64_neon_frecpx, Add1ArgType),
NEONMAP1(vrshld_s64, arm64_neon_srshl, Add1ArgType),
NEONMAP1(vrshld_u64, arm64_neon_urshl, Add1ArgType),
NEONMAP1(vrsqrted_f64, arm64_neon_frsqrte, Add1ArgType),
NEONMAP1(vrsqrtes_f32, arm64_neon_frsqrte, Add1ArgType),
NEONMAP1(vrsqrtsd_f64, arm64_neon_frsqrts, Add1ArgType),
NEONMAP1(vrsqrtss_f32, arm64_neon_frsqrts, Add1ArgType),
NEONMAP1(vsha1cq_u32, arm64_crypto_sha1c, 0),
@ -2703,8 +2720,16 @@ static NeonIntrinsicInfo ARM64SISDIntrinsicMap[] = {
NEONMAP1(vshld_u64, arm64_neon_ushl, Add1ArgType),
NEONMAP1(vslid_n_s64, arm64_neon_vsli, Vectorize1ArgType),
NEONMAP1(vslid_n_u64, arm64_neon_vsli, Vectorize1ArgType),
NEONMAP1(vsqaddb_u8, arm64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
NEONMAP1(vsqaddd_u64, arm64_neon_usqadd, Add1ArgType),
NEONMAP1(vsqaddh_u16, arm64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
NEONMAP1(vsqadds_u32, arm64_neon_usqadd, Add1ArgType),
NEONMAP1(vsrid_n_s64, arm64_neon_vsri, Vectorize1ArgType),
NEONMAP1(vsrid_n_u64, arm64_neon_vsri, Vectorize1ArgType),
NEONMAP1(vuqaddb_s8, arm64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
NEONMAP1(vuqaddd_s64, arm64_neon_suqadd, Add1ArgType),
NEONMAP1(vuqaddh_s16, arm64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
NEONMAP1(vuqadds_s32, arm64_neon_suqadd, Add1ArgType),
};
#undef NEONMAP0
@ -2783,10 +2808,31 @@ static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
const NeonIntrinsicInfo &SISDInfo,
SmallVectorImpl<Value *> &Ops,
const CallExpr *E) {
unsigned BuiltinID = SISDInfo.BuiltinID;
unsigned int Int = SISDInfo.LLVMIntrinsic;
unsigned Modifier = SISDInfo.TypeModifier;
const char *s = SISDInfo.NameHint;
switch (BuiltinID) {
case NEON::BI__builtin_neon_vcled_s64:
case NEON::BI__builtin_neon_vcled_u64:
case NEON::BI__builtin_neon_vcles_f32:
case NEON::BI__builtin_neon_vcled_f64:
case NEON::BI__builtin_neon_vcltd_s64:
case NEON::BI__builtin_neon_vcltd_u64:
case NEON::BI__builtin_neon_vclts_f32:
case NEON::BI__builtin_neon_vcltd_f64:
case NEON::BI__builtin_neon_vcales_f32:
case NEON::BI__builtin_neon_vcaled_f64:
case NEON::BI__builtin_neon_vcalts_f32:
case NEON::BI__builtin_neon_vcaltd_f64:
// Only one direction of comparisons actually exist, cmle is actually a cmge
// with swapped operands. The table gives us the right intrinsic but we
// still need to do the swap.
std::swap(Ops[0], Ops[1]);
break;
}
assert(Int && "Generic code assumes a valid intrinsic");
// Determine the type(s) of this overloaded AArch64 intrinsic.
@ -2955,23 +3001,6 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
case NEON::BI__builtin_neon_vsetq_lane_f64:
return CGF.EmitARMBuiltinExpr(NEON::BI__builtin_neon_vset_lane_i8, E);
case NEON::BI__builtin_neon_vcled_s64:
case NEON::BI__builtin_neon_vcled_u64:
case NEON::BI__builtin_neon_vcles_f32:
case NEON::BI__builtin_neon_vcled_f64:
case NEON::BI__builtin_neon_vcltd_s64:
case NEON::BI__builtin_neon_vcltd_u64:
case NEON::BI__builtin_neon_vclts_f32:
case NEON::BI__builtin_neon_vcltd_f64:
case NEON::BI__builtin_neon_vcales_f32:
case NEON::BI__builtin_neon_vcaled_f64:
case NEON::BI__builtin_neon_vcalts_f32:
case NEON::BI__builtin_neon_vcaltd_f64:
// Only one direction of comparisons actually exist, cmle is actually a cmge
// with swapped operands. The table gives us the right intrinsic but we
// still need to do the swap.
std::swap(Ops[0], Ops[1]);
break;
case NEON::BI__builtin_neon_vceqzd_s64:
case NEON::BI__builtin_neon_vceqzd_u64:
case NEON::BI__builtin_neon_vcgezd_s64:
@ -5338,12 +5367,11 @@ Value *CodeGenFunction::EmitARM64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
}
llvm::Type *Ty = llvm::Type::getInt64Ty(getLLVMContext());
Ops.push_back(EmitScalarExpr(E->getArg(1)));
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
return Builder.CreateSExt(Ops[0], Ty, "vceqd");
return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
}
case NEON::BI__builtin_neon_vtstd_s64:
case NEON::BI__builtin_neon_vtstd_u64: {
@ -5518,13 +5546,11 @@ Value *CodeGenFunction::EmitARM64BuiltinExpr(unsigned BuiltinID,
unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
? Intrinsic::arm64_neon_urshl
: Intrinsic::arm64_neon_srshl;
llvm::Type *VTy = llvm::VectorType::get(Int64Ty, 1);
SmallVector<Value *, 2> ShiftOps;
ShiftOps.push_back(Ops[1]);
ShiftOps.push_back(EmitScalarExpr(E->getArg(2)));
Ops[1] =
EmitNeonCall(CGM.getIntrinsic(Int, VTy), ShiftOps, "vrshr_n", 1, true);
return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[0], Int64Ty));
Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Int64Ty), Ops[1],
Builder.CreateSExt(Ops[2], Int64Ty));
return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
}
case NEON::BI__builtin_neon_vshld_n_s64:
case NEON::BI__builtin_neon_vshld_n_u64: {
@ -6033,6 +6059,8 @@ Value *CodeGenFunction::EmitARM64BuiltinExpr(unsigned BuiltinID,
Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
return Builder.CreateBitCast(Result, Ty);
}
case NEON::BI__builtin_neon_vnegd_s64:
return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
case NEON::BI__builtin_neon_vpmaxnm_v:
case NEON::BI__builtin_neon_vpmaxnmq_v: {
Int = Intrinsic::arm64_neon_fmaxnmp;

File diff suppressed because it is too large Load Diff