ARM & AArch64: another block of miscellaneous NEON sharing.

llvm-svn: 200527
This commit is contained in:
Tim Northover 2014-01-31 10:46:49 +00:00
parent 027b4ee607
commit ba1e344d90
1 changed files with 83 additions and 119 deletions

View File

@ -1803,6 +1803,37 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vbslq_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty),
Ops, "vbsl");
case NEON::BI__builtin_neon_vcls_v:
case NEON::BI__builtin_neon_vclsq_v: {
Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, Ty);
return EmitNeonCall(F, Ops, "vcls");
}
case NEON::BI__builtin_neon_vclz_v:
case NEON::BI__builtin_neon_vclzq_v: {
// Generate target-independent intrinsic; also need to add second argument
// for whether or not clz of zero is undefined; on ARM it isn't.
Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ty);
Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
return EmitNeonCall(F, Ops, "vclz");
}
case NEON::BI__builtin_neon_vcnt_v:
case NEON::BI__builtin_neon_vcntq_v: {
// generate target-independent intrinsic
Function *F = CGM.getIntrinsic(Intrinsic::ctpop, Ty);
return EmitNeonCall(F, Ops, "vctpop");
}
case NEON::BI__builtin_neon_vcvt_f16_v: {
assert(Type.getEltType() == NeonTypeFlags::Float16 && !Quad &&
"unexpected vcvt_f16_v builtin");
Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf);
return EmitNeonCall(F, Ops, "vcvt");
}
case NEON::BI__builtin_neon_vcvt_f32_f16: {
assert(Type.getEltType() == NeonTypeFlags::Float16 && !Quad &&
"unexpected vcvt_f32_f16 builtin");
Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp);
return EmitNeonCall(F, Ops, "vcvt");
}
case NEON::BI__builtin_neon_vcvt_n_f32_v:
case NEON::BI__builtin_neon_vcvtq_n_f32_v: {
llvm::Type *FloatTy =
@ -1825,6 +1856,16 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(unsigned BuiltinID,
Function *F = CGM.getIntrinsic(Int, Tys);
return EmitNeonCall(F, Ops, "vcvt_n");
}
case NEON::BI__builtin_neon_vcvt_s32_v:
case NEON::BI__builtin_neon_vcvt_u32_v:
case NEON::BI__builtin_neon_vcvtq_s32_v:
case NEON::BI__builtin_neon_vcvtq_u32_v: {
llvm::Type *FloatTy =
GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
: Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
}
case NEON::BI__builtin_neon_vext_v:
case NEON::BI__builtin_neon_vextq_v: {
int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
@ -1946,6 +1987,11 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(unsigned BuiltinID,
return Builder.CreateZExt(Ops[0], Ty, "vmovl");
return Builder.CreateSExt(Ops[0], Ty, "vmovl");
}
case NEON::BI__builtin_neon_vmovn_v: {
llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
}
case NEON::BI__builtin_neon_vmul_v:
case NEON::BI__builtin_neon_vmulq_v:
assert(Type.isPoly() && "vmul builtin only supported for polynomial types");
@ -1960,19 +2006,56 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(unsigned BuiltinID,
Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
case NEON::BI__builtin_neon_vpadal_v:
case NEON::BI__builtin_neon_vpadalq_v: {
Int = Usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals;
// The source operand type has twice as many elements of half the size.
unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
llvm::Type *EltTy =
llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
llvm::Type *NarrowTy =
llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
llvm::Type *Tys[2] = { Ty, NarrowTy };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpadal");
}
case NEON::BI__builtin_neon_vpadd_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vpadd, Ty),
Ops, "vpadd");
case NEON::BI__builtin_neon_vpaddl_v:
case NEON::BI__builtin_neon_vpaddlq_v: {
Int = Usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls;
// The source operand type has twice as many elements of half the size.
unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
llvm::Type *NarrowTy =
llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
llvm::Type *Tys[2] = { Ty, NarrowTy };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
}
case NEON::BI__builtin_neon_vpmax_v:
Int = Usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
case NEON::BI__builtin_neon_vpmin_v:
Int = Usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
case NEON::BI__builtin_neon_vqabs_v:
case NEON::BI__builtin_neon_vqabsq_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, Ty),
Ops, "vqabs");
case NEON::BI__builtin_neon_vqadd_v:
case NEON::BI__builtin_neon_vqaddq_v:
Int = Usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqadd");
case NEON::BI__builtin_neon_vqmovn_v:
Int = Usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqmovn");
case NEON::BI__builtin_neon_vqmovun_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, Ty),
Ops, "vqdmull");
case NEON::BI__builtin_neon_vqneg_v:
case NEON::BI__builtin_neon_vqnegq_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, Ty),
Ops, "vqneg");
case NEON::BI__builtin_neon_vqsub_v:
case NEON::BI__builtin_neon_vqsubq_v:
Int = Usgn ? Intrinsic::arm_neon_vqsubu : Intrinsic::arm_neon_vqsubs;
@ -3919,18 +4002,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Int = Intrinsic::aarch64_neon_vmulx;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
}
case NEON::BI__builtin_neon_vpaddl_v:
case NEON::BI__builtin_neon_vpaddlq_v:
return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vpaddl_v, E);
case NEON::BI__builtin_neon_vpadal_v:
case NEON::BI__builtin_neon_vpadalq_v:
return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vpadal_v, E);
case NEON::BI__builtin_neon_vqabs_v:
case NEON::BI__builtin_neon_vqabsq_v:
return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vqabs_v, E);
case NEON::BI__builtin_neon_vqneg_v:
case NEON::BI__builtin_neon_vqnegq_v:
return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vqneg_v, E);
case NEON::BI__builtin_neon_vabs_v:
case NEON::BI__builtin_neon_vabsq_v: {
if (VTy->getElementType()->isFloatingPointTy()) {
@ -3948,29 +4019,10 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Int = Intrinsic::aarch64_neon_suqadd;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
}
case NEON::BI__builtin_neon_vcls_v:
case NEON::BI__builtin_neon_vclsq_v:
return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vcls_v, E);
case NEON::BI__builtin_neon_vclz_v:
case NEON::BI__builtin_neon_vclzq_v:
return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vclz_v, E);
case NEON::BI__builtin_neon_vcnt_v:
case NEON::BI__builtin_neon_vcntq_v:
return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vcnt_v, E);
case NEON::BI__builtin_neon_vrbit_v:
case NEON::BI__builtin_neon_vrbitq_v:
Int = Intrinsic::aarch64_neon_rbit;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
case NEON::BI__builtin_neon_vmovn_v:
return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vmovn_v, E);
case NEON::BI__builtin_neon_vqmovun_v:
return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vqmovun_v, E);
case NEON::BI__builtin_neon_vqmovn_v:
return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vqmovn_v, E);
case NEON::BI__builtin_neon_vcvt_f16_v:
return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vcvt_f16_v, E);
case NEON::BI__builtin_neon_vcvt_f32_f16:
return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vcvt_f32_f16, E);
case NEON::BI__builtin_neon_vcvt_f32_f64: {
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, false));
@ -4031,11 +4083,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Int = Intrinsic::nearbyint;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
}
case NEON::BI__builtin_neon_vcvt_s32_v:
case NEON::BI__builtin_neon_vcvt_u32_v:
case NEON::BI__builtin_neon_vcvtq_s32_v:
case NEON::BI__builtin_neon_vcvtq_u32_v:
return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vcvtq_u32_v, E);
case NEON::BI__builtin_neon_vcvt_s64_v:
case NEON::BI__builtin_neon_vcvt_u64_v:
case NEON::BI__builtin_neon_vcvtq_s64_v:
@ -4507,53 +4554,12 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq);
return EmitNeonCall(F, Ops, "vcagt");
}
case NEON::BI__builtin_neon_vcls_v:
case NEON::BI__builtin_neon_vclsq_v: {
Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, Ty);
return EmitNeonCall(F, Ops, "vcls");
}
case NEON::BI__builtin_neon_vclz_v:
case NEON::BI__builtin_neon_vclzq_v: {
// Generate target-independent intrinsic; also need to add second argument
// for whether or not clz of zero is undefined; on ARM it isn't.
Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ty);
Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
return EmitNeonCall(F, Ops, "vclz");
}
case NEON::BI__builtin_neon_vcnt_v:
case NEON::BI__builtin_neon_vcntq_v: {
// generate target-independent intrinsic
Function *F = CGM.getIntrinsic(Intrinsic::ctpop, Ty);
return EmitNeonCall(F, Ops, "vctpop");
}
case NEON::BI__builtin_neon_vcvt_f16_v: {
assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad &&
"unexpected vcvt_f16_v builtin");
Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf);
return EmitNeonCall(F, Ops, "vcvt");
}
case NEON::BI__builtin_neon_vcvt_f32_f16: {
assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad &&
"unexpected vcvt_f32_f16 builtin");
Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp);
return EmitNeonCall(F, Ops, "vcvt");
}
case NEON::BI__builtin_neon_vcvt_f32_v:
case NEON::BI__builtin_neon_vcvtq_f32_v:
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
case NEON::BI__builtin_neon_vcvt_s32_v:
case NEON::BI__builtin_neon_vcvt_u32_v:
case NEON::BI__builtin_neon_vcvtq_s32_v:
case NEON::BI__builtin_neon_vcvtq_u32_v: {
llvm::Type *FloatTy =
GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
: Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
}
case NEON::BI__builtin_neon_vld1q_lane_v:
// Handle 64-bit integer elements as a special case. Use shuffles of
// one-element vectors to avoid poor code for i64 in the backend.
@ -4642,48 +4648,6 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
return Builder.CreateStore(Ops[1], Ops[0]);
}
case NEON::BI__builtin_neon_vmovn_v: {
llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
}
case NEON::BI__builtin_neon_vpadal_v:
case NEON::BI__builtin_neon_vpadalq_v: {
Int = usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals;
// The source operand type has twice as many elements of half the size.
unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
llvm::Type *EltTy =
llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
llvm::Type *NarrowTy =
llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
llvm::Type *Tys[2] = { Ty, NarrowTy };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpadal");
}
case NEON::BI__builtin_neon_vpaddl_v:
case NEON::BI__builtin_neon_vpaddlq_v: {
Int = usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls;
// The source operand type has twice as many elements of half the size.
unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
llvm::Type *NarrowTy =
llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
llvm::Type *Tys[2] = { Ty, NarrowTy };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
}
case NEON::BI__builtin_neon_vqabs_v:
case NEON::BI__builtin_neon_vqabsq_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, Ty),
Ops, "vqabs");
case NEON::BI__builtin_neon_vqmovn_v:
Int = usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqmovn");
case NEON::BI__builtin_neon_vqmovun_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, Ty),
Ops, "vqdmull");
case NEON::BI__builtin_neon_vqneg_v:
case NEON::BI__builtin_neon_vqnegq_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, Ty),
Ops, "vqneg");
case NEON::BI__builtin_neon_vqrshrn_n_v:
Int =
usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;