AArch64: refactor table-driven NEON lookup.

This extracts the table-driven intrinsic lookup phase into a separate
function, to be used by EmitCommonNeonBuiltinExpr soon.

It also simplifies the logic used in that lookup, since VectorCastArgN
and ScalarArgN were actually identical.

llvm-svn: 201847
This commit is contained in:
Tim Northover 2014-02-21 11:57:20 +00:00
parent 96601cad2f
commit 2d83796860
2 changed files with 184 additions and 203 deletions

View File

@ -2448,22 +2448,25 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(unsigned BuiltinID,
return 0;
}
enum NeonScalarTypeMod {
ScalarRet = (1 << 0),
VectorRet = (1 << 1),
ScalarArg0 = (1 << 2),
VectorGetArg0 = (1 << 3),
VectorCastArg0 = (1 << 4),
ScalarArg1 = (1 << 5),
VectorGetArg1 = (1 << 6),
VectorCastArg1 = (1 << 7),
ScalarFpCmpzArg1 = (1 << 8),
enum {
AddRetType = (1 << 0),
Add1ArgType = (1 << 1),
Add2ArgTypes = (1 << 2),
VectorRetGetArgs01 = VectorRet | VectorGetArg0 | VectorGetArg1,
FpCmpzModifiers = VectorRet | ScalarArg0 | ScalarFpCmpzArg1
VectorizeRetType = (1 << 3),
VectorizeArgTypes = (1 << 4),
InventFloatType = (1 << 5),
Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
VectorRet = AddRetType | VectorizeRetType,
VectorRetGetArgs01 =
AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
FpCmpzModifiers =
AddRetType | VectorizeRetType | Add1ArgType | InventFloatType,
};
struct NeonSISDIntrinsicInfo {
struct NeonSISDIntrinsicInfo {
unsigned BuiltinID;
unsigned LLVMIntrinsic;
const char *NameHint;
@ -2482,116 +2485,116 @@ struct NeonSISDIntrinsicInfo {
Intrinsic::LLVMIntrinsic, #NameBase, TypeModifier }
static const NeonSISDIntrinsicInfo AArch64SISDIntrinsicInfo[] = {
SISDMAP1(vabdd, f64, aarch64_neon_vabd, ScalarRet),
SISDMAP1(vabds, f32, aarch64_neon_vabd, ScalarRet),
SISDMAP1(vabdd, f64, aarch64_neon_vabd, AddRetType),
SISDMAP1(vabds, f32, aarch64_neon_vabd, AddRetType),
SISDMAP1(vabsd, s64, aarch64_neon_vabs, 0),
SISDMAP1(vaddd, s64, aarch64_neon_vaddds, 0),
SISDMAP1(vaddd, u64, aarch64_neon_vadddu, 0),
SISDMAP1(vaddlv, s16, aarch64_neon_saddlv, VectorRet | VectorCastArg1),
SISDMAP1(vaddlv, s32, aarch64_neon_saddlv, VectorRet | VectorCastArg1),
SISDMAP1(vaddlv, s8, aarch64_neon_saddlv, VectorRet | VectorCastArg1),
SISDMAP1(vaddlv, u16, aarch64_neon_uaddlv, VectorRet | VectorCastArg1),
SISDMAP1(vaddlv, u32, aarch64_neon_uaddlv, VectorRet | VectorCastArg1),
SISDMAP1(vaddlv, u8, aarch64_neon_uaddlv, VectorRet | VectorCastArg1),
SISDMAP1(vaddlvq, s16, aarch64_neon_saddlv, VectorRet | VectorCastArg1),
SISDMAP1(vaddlvq, s32, aarch64_neon_saddlv, VectorRet | VectorCastArg1),
SISDMAP1(vaddlvq, s8, aarch64_neon_saddlv, VectorRet | VectorCastArg1),
SISDMAP1(vaddlvq, u16, aarch64_neon_uaddlv, VectorRet | VectorCastArg1),
SISDMAP1(vaddlvq, u32, aarch64_neon_uaddlv, VectorRet | VectorCastArg1),
SISDMAP1(vaddlvq, u8, aarch64_neon_uaddlv, VectorRet | VectorCastArg1),
SISDMAP1(vaddv, f32, aarch64_neon_vpfadd, ScalarRet | VectorCastArg0),
SISDMAP1(vaddv, s16, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
SISDMAP1(vaddv, s32, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
SISDMAP1(vaddv, s8, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
SISDMAP1(vaddv, u16, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
SISDMAP1(vaddv, u32, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
SISDMAP1(vaddv, u8, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
SISDMAP1(vaddvq, f32, aarch64_neon_vpfadd, ScalarRet | VectorCastArg0),
SISDMAP1(vaddvq, f64, aarch64_neon_vpfadd, ScalarRet | VectorCastArg0),
SISDMAP1(vaddvq, s16, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
SISDMAP1(vaddvq, s32, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
SISDMAP1(vaddvq, s64, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
SISDMAP1(vaddvq, s8, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
SISDMAP1(vaddvq, u16, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
SISDMAP1(vaddvq, u32, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
SISDMAP1(vaddvq, u64, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
SISDMAP1(vaddvq, u8, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
SISDMAP1(vcaged, f64, aarch64_neon_fcage, VectorRet | ScalarArg0 | ScalarArg1),
SISDMAP1(vcages, f32, aarch64_neon_fcage, VectorRet | ScalarArg0 | ScalarArg1),
SISDMAP1(vcagtd, f64, aarch64_neon_fcagt, VectorRet | ScalarArg0 | ScalarArg1),
SISDMAP1(vcagts, f32, aarch64_neon_fcagt, VectorRet | ScalarArg0 | ScalarArg1),
SISDMAP1(vcaled, f64, aarch64_neon_fcage, VectorRet | ScalarArg0 | ScalarArg1),
SISDMAP1(vcales, f32, aarch64_neon_fcage, VectorRet | ScalarArg0 | ScalarArg1),
SISDMAP1(vcaltd, f64, aarch64_neon_fcagt, VectorRet | ScalarArg0 | ScalarArg1),
SISDMAP1(vcalts, f32, aarch64_neon_fcagt, VectorRet | ScalarArg0 | ScalarArg1),
SISDMAP1(vceqd, f64, aarch64_neon_fceq, VectorRet | ScalarArg0 | ScalarArg1),
SISDMAP1(vaddlv, s16, aarch64_neon_saddlv, VectorRet | Add1ArgType),
SISDMAP1(vaddlv, s32, aarch64_neon_saddlv, VectorRet | Add1ArgType),
SISDMAP1(vaddlv, s8, aarch64_neon_saddlv, VectorRet | Add1ArgType),
SISDMAP1(vaddlv, u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
SISDMAP1(vaddlv, u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
SISDMAP1(vaddlv, u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
SISDMAP1(vaddlvq, s16, aarch64_neon_saddlv, VectorRet | Add1ArgType),
SISDMAP1(vaddlvq, s32, aarch64_neon_saddlv, VectorRet | Add1ArgType),
SISDMAP1(vaddlvq, s8, aarch64_neon_saddlv, VectorRet | Add1ArgType),
SISDMAP1(vaddlvq, u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
SISDMAP1(vaddlvq, u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
SISDMAP1(vaddlvq, u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
SISDMAP1(vaddv, f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
SISDMAP1(vaddv, s16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
SISDMAP1(vaddv, s32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
SISDMAP1(vaddv, s8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
SISDMAP1(vaddv, u16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
SISDMAP1(vaddv, u32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
SISDMAP1(vaddv, u8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
SISDMAP1(vaddvq, f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
SISDMAP1(vaddvq, f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
SISDMAP1(vaddvq, s16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
SISDMAP1(vaddvq, s32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
SISDMAP1(vaddvq, s64, aarch64_neon_vaddv, VectorRet | Add1ArgType),
SISDMAP1(vaddvq, s8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
SISDMAP1(vaddvq, u16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
SISDMAP1(vaddvq, u32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
SISDMAP1(vaddvq, u64, aarch64_neon_vaddv, VectorRet | Add1ArgType),
SISDMAP1(vaddvq, u8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
SISDMAP1(vcaged, f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
SISDMAP1(vcages, f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
SISDMAP1(vcagtd, f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
SISDMAP1(vcagts, f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
SISDMAP1(vcaled, f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
SISDMAP1(vcales, f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
SISDMAP1(vcaltd, f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
SISDMAP1(vcalts, f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
SISDMAP1(vceqd, f64, aarch64_neon_fceq, VectorRet | Add2ArgTypes),
SISDMAP1(vceqd, s64, aarch64_neon_vceq, VectorRetGetArgs01),
SISDMAP1(vceqd, u64, aarch64_neon_vceq, VectorRetGetArgs01),
SISDMAP1(vceqs, f32, aarch64_neon_fceq, VectorRet | ScalarArg0 | ScalarArg1),
SISDMAP1(vceqs, f32, aarch64_neon_fceq, VectorRet | Add2ArgTypes),
SISDMAP1(vceqzd, f64, aarch64_neon_fceq, FpCmpzModifiers),
SISDMAP1(vceqzd, s64, aarch64_neon_vceq, VectorRetGetArgs01),
SISDMAP1(vceqzd, u64, aarch64_neon_vceq, VectorRetGetArgs01),
SISDMAP1(vceqzs, f32, aarch64_neon_fceq, FpCmpzModifiers),
SISDMAP1(vcged, f64, aarch64_neon_fcge, VectorRet | ScalarArg0 | ScalarArg1),
SISDMAP1(vcged, f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
SISDMAP1(vcged, s64, aarch64_neon_vcge, VectorRetGetArgs01),
SISDMAP1(vcged, u64, aarch64_neon_vchs, VectorRetGetArgs01),
SISDMAP1(vcges, f32, aarch64_neon_fcge, VectorRet | ScalarArg0 | ScalarArg1),
SISDMAP1(vcges, f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
SISDMAP1(vcgezd, f64, aarch64_neon_fcge, FpCmpzModifiers),
SISDMAP1(vcgezd, s64, aarch64_neon_vcge, VectorRetGetArgs01),
SISDMAP1(vcgezs, f32, aarch64_neon_fcge, FpCmpzModifiers),
SISDMAP1(vcgtd, f64, aarch64_neon_fcgt, VectorRet | ScalarArg0 | ScalarArg1),
SISDMAP1(vcgtd, f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
SISDMAP1(vcgtd, s64, aarch64_neon_vcgt, VectorRetGetArgs01),
SISDMAP1(vcgtd, u64, aarch64_neon_vchi, VectorRetGetArgs01),
SISDMAP1(vcgts, f32, aarch64_neon_fcgt, VectorRet | ScalarArg0 | ScalarArg1),
SISDMAP1(vcgts, f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
SISDMAP1(vcgtzd, f64, aarch64_neon_fcgt, FpCmpzModifiers),
SISDMAP1(vcgtzd, s64, aarch64_neon_vcgt, VectorRetGetArgs01),
SISDMAP1(vcgtzs, f32, aarch64_neon_fcgt, FpCmpzModifiers),
SISDMAP1(vcled, f64, aarch64_neon_fcge, VectorRet | ScalarArg0 | ScalarArg1),
SISDMAP1(vcled, f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
SISDMAP1(vcled, s64, aarch64_neon_vcge, VectorRetGetArgs01),
SISDMAP1(vcled, u64, aarch64_neon_vchs, VectorRetGetArgs01),
SISDMAP1(vcles, f32, aarch64_neon_fcge, VectorRet | ScalarArg0 | ScalarArg1),
SISDMAP1(vcles, f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
SISDMAP1(vclezd, f64, aarch64_neon_fclez, FpCmpzModifiers),
SISDMAP1(vclezd, s64, aarch64_neon_vclez, VectorRetGetArgs01),
SISDMAP1(vclezs, f32, aarch64_neon_fclez, FpCmpzModifiers),
SISDMAP1(vcltd, f64, aarch64_neon_fcgt, VectorRet | ScalarArg0 | ScalarArg1),
SISDMAP1(vcltd, f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
SISDMAP1(vcltd, s64, aarch64_neon_vcgt, VectorRetGetArgs01),
SISDMAP1(vcltd, u64, aarch64_neon_vchi, VectorRetGetArgs01),
SISDMAP1(vclts, f32, aarch64_neon_fcgt, VectorRet | ScalarArg0 | ScalarArg1),
SISDMAP1(vclts, f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
SISDMAP1(vcltzd, f64, aarch64_neon_fcltz, FpCmpzModifiers),
SISDMAP1(vcltzd, s64, aarch64_neon_vcltz, VectorRetGetArgs01),
SISDMAP1(vcltzs, f32, aarch64_neon_fcltz, FpCmpzModifiers),
SISDMAP1(vcvtad_s64, f64, aarch64_neon_fcvtas, VectorRet | ScalarArg1),
SISDMAP1(vcvtad_u64, f64, aarch64_neon_fcvtau, VectorRet | ScalarArg1),
SISDMAP1(vcvtas_s32, f32, aarch64_neon_fcvtas, VectorRet | ScalarArg1),
SISDMAP1(vcvtas_u32, f32, aarch64_neon_fcvtau, VectorRet | ScalarArg1),
SISDMAP1(vcvtd_f64, s64, aarch64_neon_vcvtint2fps, ScalarRet | VectorGetArg0),
SISDMAP1(vcvtd_f64, u64, aarch64_neon_vcvtint2fpu, ScalarRet | VectorGetArg0),
SISDMAP1(vcvtd_n_f64, s64, aarch64_neon_vcvtfxs2fp_n, ScalarRet | VectorGetArg0),
SISDMAP1(vcvtd_n_f64, u64, aarch64_neon_vcvtfxu2fp_n, ScalarRet | VectorGetArg0),
SISDMAP1(vcvtd_n_s64, f64, aarch64_neon_vcvtfp2fxs_n, VectorRet | ScalarArg0),
SISDMAP1(vcvtd_n_u64, f64, aarch64_neon_vcvtfp2fxu_n, VectorRet | ScalarArg0),
SISDMAP1(vcvtd_s64, f64, aarch64_neon_fcvtzs, VectorRet | ScalarArg1),
SISDMAP1(vcvtd_u64, f64, aarch64_neon_fcvtzu, VectorRet | ScalarArg1),
SISDMAP1(vcvtmd_s64, f64, aarch64_neon_fcvtms, VectorRet | ScalarArg1),
SISDMAP1(vcvtmd_u64, f64, aarch64_neon_fcvtmu, VectorRet | ScalarArg1),
SISDMAP1(vcvtms_s32, f32, aarch64_neon_fcvtms, VectorRet | ScalarArg1),
SISDMAP1(vcvtms_u32, f32, aarch64_neon_fcvtmu, VectorRet | ScalarArg1),
SISDMAP1(vcvtnd_s64, f64, aarch64_neon_fcvtns, VectorRet | ScalarArg1),
SISDMAP1(vcvtnd_u64, f64, aarch64_neon_fcvtnu, VectorRet | ScalarArg1),
SISDMAP1(vcvtns_s32, f32, aarch64_neon_fcvtns, VectorRet | ScalarArg1),
SISDMAP1(vcvtns_u32, f32, aarch64_neon_fcvtnu, VectorRet | ScalarArg1),
SISDMAP1(vcvtpd_s64, f64, aarch64_neon_fcvtps, VectorRet | ScalarArg1),
SISDMAP1(vcvtpd_u64, f64, aarch64_neon_fcvtpu, VectorRet | ScalarArg1),
SISDMAP1(vcvtps_s32, f32, aarch64_neon_fcvtps, VectorRet | ScalarArg1),
SISDMAP1(vcvtps_u32, f32, aarch64_neon_fcvtpu, VectorRet | ScalarArg1),
SISDMAP1(vcvts_f32, s32, aarch64_neon_vcvtint2fps, ScalarRet | VectorGetArg0),
SISDMAP1(vcvts_f32, u32, aarch64_neon_vcvtint2fpu, ScalarRet | VectorGetArg0),
SISDMAP1(vcvts_n_f32, s32, aarch64_neon_vcvtfxs2fp_n, ScalarRet | VectorGetArg0),
SISDMAP1(vcvts_n_f32, u32, aarch64_neon_vcvtfxu2fp_n, ScalarRet | VectorGetArg0),
SISDMAP1(vcvts_n_s32, f32, aarch64_neon_vcvtfp2fxs_n, VectorRet | ScalarArg0),
SISDMAP1(vcvts_n_u32, f32, aarch64_neon_vcvtfp2fxu_n, VectorRet | ScalarArg0),
SISDMAP1(vcvts_s32, f32, aarch64_neon_fcvtzs, VectorRet | ScalarArg1),
SISDMAP1(vcvts_u32, f32, aarch64_neon_fcvtzu, VectorRet | ScalarArg1),
SISDMAP1(vcvtad_s64, f64, aarch64_neon_fcvtas, VectorRet | Add1ArgType),
SISDMAP1(vcvtad_u64, f64, aarch64_neon_fcvtau, VectorRet | Add1ArgType),
SISDMAP1(vcvtas_s32, f32, aarch64_neon_fcvtas, VectorRet | Add1ArgType),
SISDMAP1(vcvtas_u32, f32, aarch64_neon_fcvtau, VectorRet | Add1ArgType),
SISDMAP1(vcvtd_f64, s64, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType),
SISDMAP1(vcvtd_f64, u64, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType),
SISDMAP1(vcvtd_n_f64, s64, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType),
SISDMAP1(vcvtd_n_f64, u64, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType),
SISDMAP1(vcvtd_n_s64, f64, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType),
SISDMAP1(vcvtd_n_u64, f64, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType),
SISDMAP1(vcvtd_s64, f64, aarch64_neon_fcvtzs, VectorRet | Add1ArgType),
SISDMAP1(vcvtd_u64, f64, aarch64_neon_fcvtzu, VectorRet | Add1ArgType),
SISDMAP1(vcvtmd_s64, f64, aarch64_neon_fcvtms, VectorRet | Add1ArgType),
SISDMAP1(vcvtmd_u64, f64, aarch64_neon_fcvtmu, VectorRet | Add1ArgType),
SISDMAP1(vcvtms_s32, f32, aarch64_neon_fcvtms, VectorRet | Add1ArgType),
SISDMAP1(vcvtms_u32, f32, aarch64_neon_fcvtmu, VectorRet | Add1ArgType),
SISDMAP1(vcvtnd_s64, f64, aarch64_neon_fcvtns, VectorRet | Add1ArgType),
SISDMAP1(vcvtnd_u64, f64, aarch64_neon_fcvtnu, VectorRet | Add1ArgType),
SISDMAP1(vcvtns_s32, f32, aarch64_neon_fcvtns, VectorRet | Add1ArgType),
SISDMAP1(vcvtns_u32, f32, aarch64_neon_fcvtnu, VectorRet | Add1ArgType),
SISDMAP1(vcvtpd_s64, f64, aarch64_neon_fcvtps, VectorRet | Add1ArgType),
SISDMAP1(vcvtpd_u64, f64, aarch64_neon_fcvtpu, VectorRet | Add1ArgType),
SISDMAP1(vcvtps_s32, f32, aarch64_neon_fcvtps, VectorRet | Add1ArgType),
SISDMAP1(vcvtps_u32, f32, aarch64_neon_fcvtpu, VectorRet | Add1ArgType),
SISDMAP1(vcvts_f32, s32, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType),
SISDMAP1(vcvts_f32, u32, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType),
SISDMAP1(vcvts_n_f32, s32, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType),
SISDMAP1(vcvts_n_f32, u32, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType),
SISDMAP1(vcvts_n_s32, f32, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType),
SISDMAP1(vcvts_n_u32, f32, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType),
SISDMAP1(vcvts_s32, f32, aarch64_neon_fcvtzs, VectorRet | Add1ArgType),
SISDMAP1(vcvts_u32, f32, aarch64_neon_fcvtzu, VectorRet | Add1ArgType),
SISDMAP1(vcvtxd_f32, f64, aarch64_neon_fcvtxn, 0),
SISDMAP0(vdupb_lane, i8),
SISDMAP0(vdupb_laneq, i8),
@ -2621,59 +2624,59 @@ static const NeonSISDIntrinsicInfo AArch64SISDIntrinsicInfo[] = {
SISDMAP0(vgetq_lane, i32),
SISDMAP0(vgetq_lane, i64),
SISDMAP0(vgetq_lane, i8),
SISDMAP1(vmaxnmv, f32, aarch64_neon_vpfmaxnm, ScalarRet | VectorCastArg0),
SISDMAP1(vmaxnmv, f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
SISDMAP1(vmaxnmvq, f32, aarch64_neon_vmaxnmv, 0),
SISDMAP1(vmaxnmvq, f64, aarch64_neon_vpfmaxnm, ScalarRet | VectorCastArg0),
SISDMAP1(vmaxv, f32, aarch64_neon_vpmax, ScalarRet | VectorCastArg0),
SISDMAP1(vmaxv, s16, aarch64_neon_smaxv, VectorRet | VectorCastArg1),
SISDMAP1(vmaxv, s32, aarch64_neon_smaxv, VectorRet | VectorCastArg1),
SISDMAP1(vmaxv, s8, aarch64_neon_smaxv, VectorRet | VectorCastArg1),
SISDMAP1(vmaxv, u16, aarch64_neon_umaxv, VectorRet | VectorCastArg1),
SISDMAP1(vmaxv, u32, aarch64_neon_umaxv, VectorRet | VectorCastArg1),
SISDMAP1(vmaxv, u8, aarch64_neon_umaxv, VectorRet | VectorCastArg1),
SISDMAP1(vmaxnmvq, f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
SISDMAP1(vmaxv, f32, aarch64_neon_vpmax, AddRetType | Add1ArgType),
SISDMAP1(vmaxv, s16, aarch64_neon_smaxv, VectorRet | Add1ArgType),
SISDMAP1(vmaxv, s32, aarch64_neon_smaxv, VectorRet | Add1ArgType),
SISDMAP1(vmaxv, s8, aarch64_neon_smaxv, VectorRet | Add1ArgType),
SISDMAP1(vmaxv, u16, aarch64_neon_umaxv, VectorRet | Add1ArgType),
SISDMAP1(vmaxv, u32, aarch64_neon_umaxv, VectorRet | Add1ArgType),
SISDMAP1(vmaxv, u8, aarch64_neon_umaxv, VectorRet | Add1ArgType),
SISDMAP1(vmaxvq, f32, aarch64_neon_vmaxv, 0),
SISDMAP1(vmaxvq, f64, aarch64_neon_vpmax, ScalarRet | VectorCastArg0),
SISDMAP1(vmaxvq, s16, aarch64_neon_smaxv, VectorRet | VectorCastArg1),
SISDMAP1(vmaxvq, s32, aarch64_neon_smaxv, VectorRet | VectorCastArg1),
SISDMAP1(vmaxvq, s8, aarch64_neon_smaxv, VectorRet | VectorCastArg1),
SISDMAP1(vmaxvq, u16, aarch64_neon_umaxv, VectorRet | VectorCastArg1),
SISDMAP1(vmaxvq, u32, aarch64_neon_umaxv, VectorRet | VectorCastArg1),
SISDMAP1(vmaxvq, u8, aarch64_neon_umaxv, VectorRet | VectorCastArg1),
SISDMAP1(vminnmv, f32, aarch64_neon_vpfminnm, ScalarRet | VectorCastArg0),
SISDMAP1(vmaxvq, f64, aarch64_neon_vpmax, AddRetType | Add1ArgType),
SISDMAP1(vmaxvq, s16, aarch64_neon_smaxv, VectorRet | Add1ArgType),
SISDMAP1(vmaxvq, s32, aarch64_neon_smaxv, VectorRet | Add1ArgType),
SISDMAP1(vmaxvq, s8, aarch64_neon_smaxv, VectorRet | Add1ArgType),
SISDMAP1(vmaxvq, u16, aarch64_neon_umaxv, VectorRet | Add1ArgType),
SISDMAP1(vmaxvq, u32, aarch64_neon_umaxv, VectorRet | Add1ArgType),
SISDMAP1(vmaxvq, u8, aarch64_neon_umaxv, VectorRet | Add1ArgType),
SISDMAP1(vminnmv, f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
SISDMAP1(vminnmvq, f32, aarch64_neon_vminnmv, 0),
SISDMAP1(vminnmvq, f64, aarch64_neon_vpfminnm, ScalarRet | VectorCastArg0),
SISDMAP1(vminv, f32, aarch64_neon_vpmin, ScalarRet | VectorCastArg0),
SISDMAP1(vminv, s16, aarch64_neon_sminv, VectorRet | VectorCastArg1),
SISDMAP1(vminv, s32, aarch64_neon_sminv, VectorRet | VectorCastArg1),
SISDMAP1(vminv, s8, aarch64_neon_sminv, VectorRet | VectorCastArg1),
SISDMAP1(vminv, u16, aarch64_neon_uminv, VectorRet | VectorCastArg1),
SISDMAP1(vminv, u32, aarch64_neon_uminv, VectorRet | VectorCastArg1),
SISDMAP1(vminv, u8, aarch64_neon_uminv, VectorRet | VectorCastArg1),
SISDMAP1(vminnmvq, f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
SISDMAP1(vminv, f32, aarch64_neon_vpmin, AddRetType | Add1ArgType),
SISDMAP1(vminv, s16, aarch64_neon_sminv, VectorRet | Add1ArgType),
SISDMAP1(vminv, s32, aarch64_neon_sminv, VectorRet | Add1ArgType),
SISDMAP1(vminv, s8, aarch64_neon_sminv, VectorRet | Add1ArgType),
SISDMAP1(vminv, u16, aarch64_neon_uminv, VectorRet | Add1ArgType),
SISDMAP1(vminv, u32, aarch64_neon_uminv, VectorRet | Add1ArgType),
SISDMAP1(vminv, u8, aarch64_neon_uminv, VectorRet | Add1ArgType),
SISDMAP1(vminvq, f32, aarch64_neon_vminv, 0),
SISDMAP1(vminvq, f64, aarch64_neon_vpmin, ScalarRet | VectorCastArg0),
SISDMAP1(vminvq, s16, aarch64_neon_sminv, VectorRet | VectorCastArg1),
SISDMAP1(vminvq, s32, aarch64_neon_sminv, VectorRet | VectorCastArg1),
SISDMAP1(vminvq, s8, aarch64_neon_sminv, VectorRet | VectorCastArg1),
SISDMAP1(vminvq, u16, aarch64_neon_uminv, VectorRet | VectorCastArg1),
SISDMAP1(vminvq, u32, aarch64_neon_uminv, VectorRet | VectorCastArg1),
SISDMAP1(vminvq, u8, aarch64_neon_uminv, VectorRet | VectorCastArg1),
SISDMAP1(vminvq, f64, aarch64_neon_vpmin, AddRetType | Add1ArgType),
SISDMAP1(vminvq, s16, aarch64_neon_sminv, VectorRet | Add1ArgType),
SISDMAP1(vminvq, s32, aarch64_neon_sminv, VectorRet | Add1ArgType),
SISDMAP1(vminvq, s8, aarch64_neon_sminv, VectorRet | Add1ArgType),
SISDMAP1(vminvq, u16, aarch64_neon_uminv, VectorRet | Add1ArgType),
SISDMAP1(vminvq, u32, aarch64_neon_uminv, VectorRet | Add1ArgType),
SISDMAP1(vminvq, u8, aarch64_neon_uminv, VectorRet | Add1ArgType),
SISDMAP0(vmul_n, f64),
SISDMAP1(vmull, p64, aarch64_neon_vmull_p64, 0),
SISDMAP0(vmulxd, f64),
SISDMAP0(vmulxs, f32),
SISDMAP1(vnegd, s64, aarch64_neon_vneg, 0),
SISDMAP1(vpaddd, f64, aarch64_neon_vpfadd, ScalarRet | VectorCastArg0),
SISDMAP1(vpaddd, f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
SISDMAP1(vpaddd, s64, aarch64_neon_vpadd, 0),
SISDMAP1(vpaddd, u64, aarch64_neon_vpadd, 0),
SISDMAP1(vpadds, f32, aarch64_neon_vpfadd, ScalarRet | VectorCastArg0),
SISDMAP1(vpmaxnmqd, f64, aarch64_neon_vpfmaxnm, ScalarRet | VectorCastArg0),
SISDMAP1(vpmaxnms, f32, aarch64_neon_vpfmaxnm, ScalarRet | VectorCastArg0),
SISDMAP1(vpmaxqd, f64, aarch64_neon_vpmax, ScalarRet | VectorCastArg0),
SISDMAP1(vpmaxs, f32, aarch64_neon_vpmax, ScalarRet | VectorCastArg0),
SISDMAP1(vpminnmqd, f64, aarch64_neon_vpfminnm, ScalarRet | VectorCastArg0),
SISDMAP1(vpminnms, f32, aarch64_neon_vpfminnm, ScalarRet | VectorCastArg0),
SISDMAP1(vpminqd, f64, aarch64_neon_vpmin, ScalarRet | VectorCastArg0),
SISDMAP1(vpmins, f32, aarch64_neon_vpmin, ScalarRet | VectorCastArg0),
SISDMAP1(vpadds, f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
SISDMAP1(vpmaxnmqd, f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
SISDMAP1(vpmaxnms, f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
SISDMAP1(vpmaxqd, f64, aarch64_neon_vpmax, AddRetType | Add1ArgType),
SISDMAP1(vpmaxs, f32, aarch64_neon_vpmax, AddRetType | Add1ArgType),
SISDMAP1(vpminnmqd, f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
SISDMAP1(vpminnms, f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
SISDMAP1(vpminqd, f64, aarch64_neon_vpmin, AddRetType | Add1ArgType),
SISDMAP1(vpmins, f32, aarch64_neon_vpmin, AddRetType | Add1ArgType),
SISDMAP1(vqabsb, s8, arm_neon_vqabs, VectorRet),
SISDMAP1(vqabsd, s64, arm_neon_vqabs, VectorRet),
SISDMAP1(vqabsh, s16, arm_neon_vqabs, VectorRet),
@ -2771,20 +2774,20 @@ static const NeonSISDIntrinsicInfo AArch64SISDIntrinsicInfo[] = {
SISDMAP1(vqsubh, u16, arm_neon_vqsubu, VectorRet),
SISDMAP1(vqsubs, s32, arm_neon_vqsubs, VectorRet),
SISDMAP1(vqsubs, u32, arm_neon_vqsubu, VectorRet),
SISDMAP1(vrecped, f64, aarch64_neon_vrecpe, ScalarRet),
SISDMAP1(vrecpes, f32, aarch64_neon_vrecpe, ScalarRet),
SISDMAP1(vrecpsd, f64, aarch64_neon_vrecps, ScalarRet),
SISDMAP1(vrecpss, f32, aarch64_neon_vrecps, ScalarRet),
SISDMAP1(vrecpxd, f64, aarch64_neon_vrecpx, ScalarRet),
SISDMAP1(vrecpxs, f32, aarch64_neon_vrecpx, ScalarRet),
SISDMAP1(vrecped, f64, aarch64_neon_vrecpe, AddRetType),
SISDMAP1(vrecpes, f32, aarch64_neon_vrecpe, AddRetType),
SISDMAP1(vrecpsd, f64, aarch64_neon_vrecps, AddRetType),
SISDMAP1(vrecpss, f32, aarch64_neon_vrecps, AddRetType),
SISDMAP1(vrecpxd, f64, aarch64_neon_vrecpx, AddRetType),
SISDMAP1(vrecpxs, f32, aarch64_neon_vrecpx, AddRetType),
SISDMAP1(vrshld, s64, aarch64_neon_vrshlds, 0),
SISDMAP1(vrshld, u64, aarch64_neon_vrshldu, 0),
SISDMAP1(vrshrd_n, s64, aarch64_neon_vsrshr, VectorRet),
SISDMAP1(vrshrd_n, u64, aarch64_neon_vurshr, VectorRet),
SISDMAP1(vrsqrted, f64, aarch64_neon_vrsqrte, ScalarRet),
SISDMAP1(vrsqrtes, f32, aarch64_neon_vrsqrte, ScalarRet),
SISDMAP1(vrsqrtsd, f64, aarch64_neon_vrsqrts, ScalarRet),
SISDMAP1(vrsqrtss, f32, aarch64_neon_vrsqrts, ScalarRet),
SISDMAP1(vrsqrted, f64, aarch64_neon_vrsqrte, AddRetType),
SISDMAP1(vrsqrtes, f32, aarch64_neon_vrsqrte, AddRetType),
SISDMAP1(vrsqrtsd, f64, aarch64_neon_vrsqrts, AddRetType),
SISDMAP1(vrsqrtss, f32, aarch64_neon_vrsqrts, AddRetType),
SISDMAP1(vrsrad_n, s64, aarch64_neon_vrsrads_n, 0),
SISDMAP1(vrsrad_n, u64, aarch64_neon_vrsradu_n, 0),
SISDMAP0(vset_lane, f32),
@ -2836,6 +2839,36 @@ static const NeonSISDIntrinsicInfo AArch64SISDIntrinsicInfo[] = {
static bool AArch64SISDIntrinsicInfoProvenSorted = false;
#endif
Function *CodeGenFunction::LookupNeonIntrinsic(unsigned IntrinsicID,
unsigned Modifier,
llvm::Type *ArgType,
const CallExpr *E) {
// Return type.
SmallVector<llvm::Type *, 3> Tys;
if (Modifier & AddRetType) {
llvm::Type *Ty = ConvertType(E->getCallReturnType());
if (Modifier & VectorizeRetType)
Ty = llvm::VectorType::get(Ty, 1);
Tys.push_back(Ty);
}
// Arguments.
if (Modifier & VectorizeArgTypes)
ArgType = llvm::VectorType::get(ArgType, 1);
if (Modifier & (Add1ArgType | Add2ArgTypes))
Tys.push_back(ArgType);
if (Modifier & Add2ArgTypes)
Tys.push_back(ArgType);
if (Modifier & InventFloatType)
Tys.push_back(FloatTy);
return CGM.getIntrinsic(IntrinsicID, Tys);
}
static Value *
EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
const NeonSISDIntrinsicInfo &SISDInfo,
@ -3017,63 +3050,9 @@ EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
return 0;
// Determine the type(s) of this overloaded AArch64 intrinsic.
Function *F = 0;
SmallVector<llvm::Type *, 3> Tys;
// Return type.
if (IntTypes & (ScalarRet | VectorRet)) {
llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
if (IntTypes & ScalarRet) {
// Scalar return value.
Tys.push_back(Ty);
} else if (IntTypes & VectorRet) {
// Convert the scalar return type to one-vector element type.
Tys.push_back(llvm::VectorType::get(Ty, 1));
}
}
// Arguments.
if (IntTypes & (ScalarArg0 | VectorGetArg0 | VectorCastArg0)) {
const Expr *Arg = E->getArg(0);
llvm::Type *Ty = CGF.ConvertType(Arg->getType());
if (IntTypes & ScalarArg0) {
// Scalar argument.
Tys.push_back(Ty);
} else if (IntTypes & VectorGetArg0) {
// Convert the scalar argument to one-vector element type.
Tys.push_back(llvm::VectorType::get(Ty, 1));
} else if (IntTypes & VectorCastArg0) {
// Cast the argument to vector type.
Tys.push_back(cast<llvm::VectorType>(Ty));
}
}
// The only intrinsics that require a 2nd argument are the compare intrinsics.
// However, the builtins don't always have a 2nd argument (e.g.,
// floating-point compare to zero), so we inspect the first argument to
// determine the type.
if (IntTypes & (ScalarArg1 | VectorGetArg1 | VectorCastArg1)) {
const Expr *Arg = E->getArg(0);
llvm::Type *Ty = CGF.ConvertType(Arg->getType());
if (IntTypes & ScalarArg1) {
// Scalar argument.
Tys.push_back(Ty);
} else if (IntTypes & VectorGetArg1) {
// Convert the scalar argument to one-vector element type.
Tys.push_back(llvm::VectorType::get(Ty, 1));
} else if (IntTypes & VectorCastArg1) {
// Cast the argument to a vector type.
Tys.push_back(cast<llvm::VectorType>(Ty));
}
} else if (IntTypes & ScalarFpCmpzArg1) {
// Floating-point zero argument.
Tys.push_back(CGF.FloatTy);
}
if (IntTypes)
F = CGF.CGM.getIntrinsic(Int, Tys);
else
F = CGF.CGM.getIntrinsic(Int);
const Expr *Arg = E->getArg(0);
llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
Function *F = CGF.LookupNeonIntrinsic(Int, IntTypes, ArgTy, E);
Value *Result = CGF.EmitNeonCall(F, Ops, s);
llvm::Type *ResultType = CGF.ConvertType(E->getType());

View File

@ -2182,6 +2182,8 @@ public:
llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
SmallVectorImpl<llvm::Value *> &Ops,
llvm::Value *Align = 0);
llvm::Function *LookupNeonIntrinsic(unsigned IntrinsicID, unsigned Modifier,
llvm::Type *ArgTy, const CallExpr *E);
llvm::Value *EmitNeonCall(llvm::Function *F,
SmallVectorImpl<llvm::Value*> &O,
const char *name,