Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,

SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL

llvm-svn: 190289
This commit is contained in:
Jiangning Liu 2013-09-09 02:21:08 +00:00
parent 2878dc8fe7
commit 1bda93a252
4 changed files with 1200 additions and 16 deletions

View File

@ -18,15 +18,22 @@ def OP_NONE : Op;
def OP_UNAVAILABLE : Op;
def OP_ADD : Op;
def OP_ADDL : Op;
def OP_ADDLHi : Op;
def OP_ADDW : Op;
def OP_ADDWHi : Op;
def OP_SUB : Op;
def OP_SUBL : Op;
def OP_SUBLHi : Op;
def OP_SUBW : Op;
def OP_SUBWHi : Op;
def OP_MUL : Op;
def OP_MLA : Op;
def OP_MLAL : Op;
def OP_MULLHi : Op;
def OP_MLALHi : Op;
def OP_MLS : Op;
def OP_MLSL : Op;
def OP_MLSLHi : Op;
def OP_MUL_N : Op;
def OP_MLA_N : Op;
def OP_MLS_N : Op;
@ -66,9 +73,18 @@ def OP_REV64 : Op;
def OP_REV32 : Op;
def OP_REV16 : Op;
def OP_REINT : Op;
def OP_ADDHNHi : Op;
def OP_RADDHNHi : Op;
def OP_SUBHNHi : Op;
def OP_RSUBHNHi : Op;
def OP_ABDL : Op;
def OP_ABDLHi : Op;
def OP_ABA : Op;
def OP_ABAL : Op;
def OP_ABALHi : Op;
def OP_QDMULLHi : Op;
def OP_QDMLALHi : Op;
def OP_QDMLSLHi : Op;
def OP_DIV : Op;
def OP_LONG_HI : Op;
def OP_NARROW_HI : Op;
@ -133,6 +149,7 @@ class NoTestOpInst<string n, string p, string t, Op o> : Inst<n, p, t, o> {}
// w: double width elements, same num elts
// n: double width elements, half num elts
// h: half width elements, double num elts
// q: half width elements, quad num elts
// e: half width elements, double num elts, unsigned
// m: half width elements, same num elts
// i: constant int
@ -589,6 +606,29 @@ def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "xdi", "Qd">;
def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "udi", "Qd">;
}
////////////////////////////////////////////////////////////////////////////////
// 3VDiff class using high 64-bit in operands
def VADDL_HIGH : SOpInst<"vaddl_high", "wkk", "csiUcUsUi", OP_ADDLHi>;
def VADDW_HIGH : SOpInst<"vaddw_high", "wwk", "csiUcUsUi", OP_ADDWHi>;
def VSUBL_HIGH : SOpInst<"vsubl_high", "wkk", "csiUcUsUi", OP_SUBLHi>;
def VSUBW_HIGH : SOpInst<"vsubw_high", "wwk", "csiUcUsUi", OP_SUBWHi>;
def VABDL_HIGH : SOpInst<"vabdl_high", "wkk", "csiUcUsUi", OP_ABDLHi>;
def VABAL_HIGH : SOpInst<"vabal_high", "wwkk", "csiUcUsUi", OP_ABALHi>;
def VMULL_HIGH : SOpInst<"vmull_high", "wkk", "csiUcUsUiPc", OP_MULLHi>;
def VMLAL_HIGH : SOpInst<"vmlal_high", "wwkk", "csiUcUsUi", OP_MLALHi>;
def VMLSL_HIGH : SOpInst<"vmlsl_high", "wwkk", "csiUcUsUi", OP_MLSLHi>;
def VADDHN_HIGH : SOpInst<"vaddhn_high", "qhkk", "silUsUiUl", OP_ADDHNHi>;
def VRADDHN_HIGH : SOpInst<"vraddhn_high", "qhkk", "silUsUiUl", OP_RADDHNHi>;
def VSUBHN_HIGH : SOpInst<"vsubhn_high", "qhkk", "silUsUiUl", OP_SUBHNHi>;
def VRSUBHN_HIGH : SOpInst<"vrsubhn_high", "qhkk", "silUsUiUl", OP_RSUBHNHi>;
def VQDMULL_HIGH : SOpInst<"vqdmull_high", "wkk", "si", OP_QDMULLHi>;
def VQDMLAL_HIGH : SOpInst<"vqdmlal_high", "wwkk", "si", OP_QDMLALHi>;
def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "wwkk", "si", OP_QDMLSLHi>;
////////////////////////////////////////////////////////////////////////////////
// Scalar Arithmetic

View File

@ -1840,6 +1840,22 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshl_v, E);
case AArch64::BI__builtin_neon_vqrshlq_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshlq_v, E);
case AArch64::BI__builtin_neon_vaddhn_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vaddhn_v, E);
case AArch64::BI__builtin_neon_vraddhn_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vraddhn_v, E);
case AArch64::BI__builtin_neon_vsubhn_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsubhn_v, E);
case AArch64::BI__builtin_neon_vrsubhn_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsubhn_v, E);
case AArch64::BI__builtin_neon_vmull_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmull_v, E);
case AArch64::BI__builtin_neon_vqdmull_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmull_v, E);
case AArch64::BI__builtin_neon_vqdmlal_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmlal_v, E);
case AArch64::BI__builtin_neon_vqdmlsl_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmlsl_v, E);
case AArch64::BI__builtin_neon_vmax_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmax_v, E);
case AArch64::BI__builtin_neon_vmaxq_v:

View File

@ -4274,3 +4274,971 @@ uint64x2_t test_vcvtq_n_u64_f64(float64x2_t a) {
return vcvtq_n_u64_f64(a, 50);
// CHECK: fcvtzu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
}
int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
// CHECK: test_vaddl_s8
return vaddl_s8(a, b);
// CHECK: saddl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
}
int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
// CHECK: test_vaddl_s16
return vaddl_s16(a, b);
// CHECK: saddl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
}
int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
// CHECK: test_vaddl_s32
return vaddl_s32(a, b);
// CHECK: saddl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
}
uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
// CHECK: test_vaddl_u8
return vaddl_u8(a, b);
// CHECK: uaddl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
}
uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
// CHECK: test_vaddl_u16
return vaddl_u16(a, b);
// CHECK: uaddl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
}
uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
// CHECK: test_vaddl_u32
return vaddl_u32(a, b);
// CHECK: uaddl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
}
int16x8_t test_vaddl_high_s8(int8x16_t a, int8x16_t b) {
// CHECK: test_vaddl_high_s8
return vaddl_high_s8(a, b);
// CHECK: saddl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
}
int32x4_t test_vaddl_high_s16(int16x8_t a, int16x8_t b) {
// CHECK: test_vaddl_high_s16
return vaddl_high_s16(a, b);
// CHECK: saddl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
int64x2_t test_vaddl_high_s32(int32x4_t a, int32x4_t b) {
// CHECK: test_vaddl_high_s32
return vaddl_high_s32(a, b);
// CHECK: saddl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
uint16x8_t test_vaddl_high_u8(uint8x16_t a, uint8x16_t b) {
// CHECK: test_vaddl_high_u8
return vaddl_high_u8(a, b);
// CHECK: uaddl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
}
uint32x4_t test_vaddl_high_u16(uint16x8_t a, uint16x8_t b) {
// CHECK: test_vaddl_high_u16
return vaddl_high_u16(a, b);
// CHECK: uaddl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
uint64x2_t test_vaddl_high_u32(uint32x4_t a, uint32x4_t b) {
// CHECK: test_vaddl_high_u32
return vaddl_high_u32(a, b);
// CHECK: uaddl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
// CHECK: test_vaddw_s8
return vaddw_s8(a, b);
// CHECK: saddw {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8b
}
int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
// CHECK: test_vaddw_s16
return vaddw_s16(a, b);
// CHECK: saddw {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4h
}
int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
// CHECK: test_vaddw_s32
return vaddw_s32(a, b);
// CHECK: saddw {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2s
}
uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
// CHECK: test_vaddw_u8
return vaddw_u8(a, b);
// CHECK: uaddw {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8b
}
uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
// CHECK: test_vaddw_u16
return vaddw_u16(a, b);
// CHECK: uaddw {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4h
}
uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
// CHECK: test_vaddw_u32
return vaddw_u32(a, b);
// CHECK: uaddw {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2s
}
int16x8_t test_vaddw_high_s8(int16x8_t a, int8x16_t b) {
// CHECK: test_vaddw_high_s8
return vaddw_high_s8(a, b);
// CHECK: saddw2 {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.16b
}
int32x4_t test_vaddw_high_s16(int32x4_t a, int16x8_t b) {
// CHECK: test_vaddw_high_s16
return vaddw_high_s16(a, b);
// CHECK: saddw2 {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.8h
}
int64x2_t test_vaddw_high_s32(int64x2_t a, int32x4_t b) {
// CHECK: test_vaddw_high_s32
return vaddw_high_s32(a, b);
// CHECK: saddw2 {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.4s
}
uint16x8_t test_vaddw_high_u8(uint16x8_t a, uint8x16_t b) {
// CHECK: test_vaddw_high_u8
return vaddw_high_u8(a, b);
// CHECK: uaddw2 {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.16b
}
uint32x4_t test_vaddw_high_u16(uint32x4_t a, uint16x8_t b) {
// CHECK: test_vaddw_high_u16
return vaddw_high_u16(a, b);
// CHECK: uaddw2 {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.8h
}
uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) {
// CHECK: test_vaddw_high_u32
return vaddw_high_u32(a, b);
// CHECK: uaddw2 {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.4s
}
int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
// CHECK: test_vsubl_s8
return vsubl_s8(a, b);
// CHECK: ssubl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
}
int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
// CHECK: test_vsubl_s16
return vsubl_s16(a, b);
// CHECK: ssubl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
}
int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
// CHECK: test_vsubl_s32
return vsubl_s32(a, b);
// CHECK: ssubl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
}
uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
// CHECK: test_vsubl_u8
return vsubl_u8(a, b);
// CHECK: usubl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
}
uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
// CHECK: test_vsubl_u16
return vsubl_u16(a, b);
// CHECK: usubl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
}
uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
// CHECK: test_vsubl_u32
return vsubl_u32(a, b);
// CHECK: usubl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
}
int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) {
// CHECK: test_vsubl_high_s8
return vsubl_high_s8(a, b);
// CHECK: ssubl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
}
int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) {
// CHECK: test_vsubl_high_s16
return vsubl_high_s16(a, b);
// CHECK: ssubl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) {
// CHECK: test_vsubl_high_s32
return vsubl_high_s32(a, b);
// CHECK: ssubl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) {
// CHECK: test_vsubl_high_u8
return vsubl_high_u8(a, b);
// CHECK: usubl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
}
uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) {
// CHECK: test_vsubl_high_u16
return vsubl_high_u16(a, b);
// CHECK: usubl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) {
// CHECK: test_vsubl_high_u32
return vsubl_high_u32(a, b);
// CHECK: usubl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
// CHECK: test_vsubw_s8
return vsubw_s8(a, b);
// CHECK: ssubw {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8b
}
int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
// CHECK: test_vsubw_s16
return vsubw_s16(a, b);
// CHECK: ssubw {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4h
}
int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
// CHECK: test_vsubw_s32
return vsubw_s32(a, b);
// CHECK: ssubw {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2s
}
uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
// CHECK: test_vsubw_u8
return vsubw_u8(a, b);
// CHECK: usubw {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8b
}
uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
// CHECK: test_vsubw_u16
return vsubw_u16(a, b);
// CHECK: usubw {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4h
}
uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
// CHECK: test_vsubw_u32
return vsubw_u32(a, b);
// CHECK: usubw {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2s
}
int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) {
// CHECK: test_vsubw_high_s8
return vsubw_high_s8(a, b);
// CHECK: ssubw2 {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.16b
}
int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) {
// CHECK: test_vsubw_high_s16
return vsubw_high_s16(a, b);
// CHECK: ssubw2 {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.8h
}
int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) {
// CHECK: test_vsubw_high_s32
return vsubw_high_s32(a, b);
// CHECK: ssubw2 {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.4s
}
uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) {
// CHECK: test_vsubw_high_u8
return vsubw_high_u8(a, b);
// CHECK: usubw2 {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.16b
}
uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) {
// CHECK: test_vsubw_high_u16
return vsubw_high_u16(a, b);
// CHECK: usubw2 {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.8h
}
uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) {
// CHECK: test_vsubw_high_u32
return vsubw_high_u32(a, b);
// CHECK: usubw2 {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.4s
}
int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
// CHECK: test_vaddhn_s16
return vaddhn_s16(a, b);
// CHECK: addhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
// CHECK: test_vaddhn_s32
return vaddhn_s32(a, b);
// CHECK: addhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
// CHECK: test_vaddhn_s64
return vaddhn_s64(a, b);
// CHECK: addhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
}
uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
// CHECK: test_vaddhn_u16
return vaddhn_u16(a, b);
// CHECK: addhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
// CHECK: test_vaddhn_u32
return vaddhn_u32(a, b);
// CHECK: addhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
// CHECK: test_vaddhn_u64
return vaddhn_u64(a, b);
// CHECK: addhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
}
int8x16_t test_vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
// CHECK: test_vaddhn_high_s16
return vaddhn_high_s16(r, a, b);
// CHECK: addhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
int16x8_t test_vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
// CHECK: test_vaddhn_high_s32
return vaddhn_high_s32(r, a, b);
// CHECK: addhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
int32x4_t test_vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
// CHECK: test_vaddhn_high_s64
return vaddhn_high_s64(r, a, b);
// CHECK: addhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
}
uint8x16_t test_vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
// CHECK: test_vaddhn_high_u16
return vaddhn_high_u16(r, a, b);
// CHECK: addhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
uint16x8_t test_vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
// CHECK: test_vaddhn_high_u32
return vaddhn_high_u32(r, a, b);
// CHECK: addhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
uint32x4_t test_vaddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
// CHECK: test_vaddhn_high_u64
return vaddhn_high_u64(r, a, b);
// CHECK: addhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
}
int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
// CHECK: test_vraddhn_s16
return vraddhn_s16(a, b);
// CHECK: raddhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
// CHECK: test_vraddhn_s32
return vraddhn_s32(a, b);
// CHECK: raddhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
// CHECK: test_vraddhn_s64
return vraddhn_s64(a, b);
// CHECK: raddhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
}
uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
// CHECK: test_vraddhn_u16
return vraddhn_u16(a, b);
// CHECK: raddhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
// CHECK: test_vraddhn_u32
return vraddhn_u32(a, b);
// CHECK: raddhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
// CHECK: test_vraddhn_u64
return vraddhn_u64(a, b);
// CHECK: raddhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
}
int8x16_t test_vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
// CHECK: test_vraddhn_high_s16
return vraddhn_high_s16(r, a, b);
// CHECK: raddhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
int16x8_t test_vraddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
// CHECK: test_vraddhn_high_s32
return vraddhn_high_s32(r, a, b);
// CHECK: raddhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
int32x4_t test_vraddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
// CHECK: test_vraddhn_high_s64
return vraddhn_high_s64(r, a, b);
// CHECK: raddhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
}
uint8x16_t test_vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
// CHECK: test_vraddhn_high_u16
return vraddhn_high_u16(r, a, b);
// CHECK: raddhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
uint16x8_t test_vraddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
// CHECK: test_vraddhn_high_u32
return vraddhn_high_u32(r, a, b);
// CHECK: raddhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
uint32x4_t test_vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
// CHECK: test_vraddhn_high_u64
return vraddhn_high_u64(r, a, b);
// CHECK: raddhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
}
int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
// CHECK: test_vsubhn_s16
return vsubhn_s16(a, b);
// CHECK: subhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
// CHECK: test_vsubhn_s32
return vsubhn_s32(a, b);
// CHECK: subhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
// CHECK: test_vsubhn_s64
return vsubhn_s64(a, b);
// CHECK: subhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
}
uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
// CHECK: test_vsubhn_u16
return vsubhn_u16(a, b);
// CHECK: subhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
// CHECK: test_vsubhn_u32
return vsubhn_u32(a, b);
// CHECK: subhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
// CHECK: test_vsubhn_u64
return vsubhn_u64(a, b);
// CHECK: subhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
}
int8x16_t test_vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
// CHECK: test_vsubhn_high_s16
return vsubhn_high_s16(r, a, b);
// CHECK: subhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
int16x8_t test_vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
// CHECK: test_vsubhn_high_s32
return vsubhn_high_s32(r, a, b);
// CHECK: subhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
int32x4_t test_vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
// CHECK: test_vsubhn_high_s64
return vsubhn_high_s64(r, a, b);
// CHECK: subhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
}
uint8x16_t test_vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
// CHECK: test_vsubhn_high_u16
return vsubhn_high_u16(r, a, b);
// CHECK: subhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
uint16x8_t test_vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
// CHECK: test_vsubhn_high_u32
return vsubhn_high_u32(r, a, b);
// CHECK: subhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
uint32x4_t test_vsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
// CHECK: test_vsubhn_high_u64
return vsubhn_high_u64(r, a, b);
// CHECK: subhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
}
int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
// CHECK: test_vrsubhn_s16
return vrsubhn_s16(a, b);
// CHECK: rsubhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
// CHECK: test_vrsubhn_s32
return vrsubhn_s32(a, b);
// CHECK: rsubhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
// CHECK: test_vrsubhn_s64
return vrsubhn_s64(a, b);
// CHECK: rsubhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
}
uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
// CHECK: test_vrsubhn_u16
return vrsubhn_u16(a, b);
// CHECK: rsubhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
// CHECK: test_vrsubhn_u32
return vrsubhn_u32(a, b);
// CHECK: rsubhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
// CHECK: test_vrsubhn_u64
return vrsubhn_u64(a, b);
// CHECK: rsubhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
}
int8x16_t test_vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
// CHECK: test_vrsubhn_high_s16
return vrsubhn_high_s16(r, a, b);
// CHECK: rsubhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
int16x8_t test_vrsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
// CHECK: test_vrsubhn_high_s32
return vrsubhn_high_s32(r, a, b);
// CHECK: rsubhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
int32x4_t test_vrsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
// CHECK: test_vrsubhn_high_s64
return vrsubhn_high_s64(r, a, b);
// CHECK: rsubhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
}
uint8x16_t test_vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
// CHECK: test_vrsubhn_high_u16
return vrsubhn_high_u16(r, a, b);
// CHECK: rsubhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
uint16x8_t test_vrsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
// CHECK: test_vrsubhn_high_u32
return vrsubhn_high_u32(r, a, b);
// CHECK: rsubhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
uint32x4_t test_vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
// CHECK: test_vrsubhn_high_u64
return vrsubhn_high_u64(r, a, b);
// CHECK: rsubhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
}
int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
// CHECK: test_vabdl_s8
return vabdl_s8(a, b);
// CHECK: sabdl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
}
int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
// CHECK: test_vabdl_s16
return vabdl_s16(a, b);
// CHECK: sabdl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
}
int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
// CHECK: test_vabdl_s32
return vabdl_s32(a, b);
// CHECK: sabdl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
}
uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
// CHECK: test_vabdl_u8
return vabdl_u8(a, b);
// CHECK: uabdl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
}
uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
// CHECK: test_vabdl_u16
return vabdl_u16(a, b);
// CHECK: uabdl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
}
uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
// CHECK: test_vabdl_u32
return vabdl_u32(a, b);
// CHECK: uabdl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
}
int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
// CHECK: test_vabal_s8
return vabal_s8(a, b, c);
// CHECK: sabal {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
}
int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
// CHECK: test_vabal_s16
return vabal_s16(a, b, c);
// CHECK: sabal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
}
int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
// CHECK: test_vabal_s32
return vabal_s32(a, b, c);
// CHECK: sabal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
}
uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
// CHECK: test_vabal_u8
return vabal_u8(a, b, c);
// CHECK: uabal {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
}
uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
// CHECK: test_vabal_u16
return vabal_u16(a, b, c);
// CHECK: uabal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
}
uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
// CHECK: test_vabal_u32
return vabal_u32(a, b, c);
// CHECK: uabal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
}
int16x8_t test_vabdl_high_s8(int8x16_t a, int8x16_t b) {
// CHECK: test_vabdl_high_s8
return vabdl_high_s8(a, b);
// CHECK: sabdl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
}
int32x4_t test_vabdl_high_s16(int16x8_t a, int16x8_t b) {
// CHECK: test_vabdl_high_s16
return vabdl_high_s16(a, b);
// CHECK: sabdl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
int64x2_t test_vabdl_high_s32(int32x4_t a, int32x4_t b) {
// CHECK: test_vabdl_high_s32
return vabdl_high_s32(a, b);
// CHECK: sabdl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
uint16x8_t test_vabdl_high_u8(uint8x16_t a, uint8x16_t b) {
// CHECK: test_vabdl_high_u8
return vabdl_high_u8(a, b);
// CHECK: uabdl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
}
uint32x4_t test_vabdl_high_u16(uint16x8_t a, uint16x8_t b) {
// CHECK: test_vabdl_high_u16
return vabdl_high_u16(a, b);
// CHECK: uabdl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
uint64x2_t test_vabdl_high_u32(uint32x4_t a, uint32x4_t b) {
// CHECK: test_vabdl_high_u32
return vabdl_high_u32(a, b);
// CHECK: uabdl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
int16x8_t test_vabal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
// CHECK: test_vabal_high_s8
return vabal_high_s8(a, b, c);
// CHECK: sabal2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
}
int32x4_t test_vabal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
// CHECK: test_vabal_high_s16
return vabal_high_s16(a, b, c);
// CHECK: sabal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
int64x2_t test_vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
// CHECK: test_vabal_high_s32
return vabal_high_s32(a, b, c);
// CHECK: sabal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
uint16x8_t test_vabal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
// CHECK: test_vabal_high_u8
return vabal_high_u8(a, b, c);
// CHECK: uabal2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
}
uint32x4_t test_vabal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
// CHECK: test_vabal_high_u16
return vabal_high_u16(a, b, c);
// CHECK: uabal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
uint64x2_t test_vabal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
// CHECK: test_vabal_high_u32
return vabal_high_u32(a, b, c);
// CHECK: uabal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
// CHECK: test_vmull_s8
return vmull_s8(a, b);
// CHECK: smull {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
}
int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
// CHECK: test_vmull_s16
return vmull_s16(a, b);
// CHECK: smull {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
}
int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
// CHECK: test_vmull_s32
return vmull_s32(a, b);
// CHECK: smull {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
}
uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
// CHECK: test_vmull_u8
return vmull_u8(a, b);
// CHECK: umull {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
}
uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
// CHECK: test_vmull_u16
return vmull_u16(a, b);
// CHECK: umull {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
}
uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
// CHECK: test_vmull_u32
return vmull_u32(a, b);
// CHECK: umull {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
}
int16x8_t test_vmull_high_s8(int8x16_t a, int8x16_t b) {
// CHECK: test_vmull_high_s8
return vmull_high_s8(a, b);
// CHECK: smull2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
}
int32x4_t test_vmull_high_s16(int16x8_t a, int16x8_t b) {
// CHECK: test_vmull_high_s16
return vmull_high_s16(a, b);
// CHECK: smull2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) {
// CHECK: test_vmull_high_s32
return vmull_high_s32(a, b);
// CHECK: smull2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
uint16x8_t test_vmull_high_u8(uint8x16_t a, uint8x16_t b) {
// CHECK: test_vmull_high_u8
return vmull_high_u8(a, b);
// CHECK: umull2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
}
uint32x4_t test_vmull_high_u16(uint16x8_t a, uint16x8_t b) {
// CHECK: test_vmull_high_u16
return vmull_high_u16(a, b);
// CHECK: umull2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
uint64x2_t test_vmull_high_u32(uint32x4_t a, uint32x4_t b) {
// CHECK: test_vmull_high_u32
return vmull_high_u32(a, b);
// CHECK: umull2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
// CHECK: test_vmlal_s8
return vmlal_s8(a, b, c);
// CHECK: smlal {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
}
int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
// CHECK: test_vmlal_s16
return vmlal_s16(a, b, c);
// CHECK: smlal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
}
int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
// CHECK: test_vmlal_s32
return vmlal_s32(a, b, c);
// CHECK: smlal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
}
uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
// CHECK: test_vmlal_u8
return vmlal_u8(a, b, c);
// CHECK: umlal {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
}
uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
// CHECK: test_vmlal_u16
return vmlal_u16(a, b, c);
// CHECK: umlal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
}
uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
// CHECK: test_vmlal_u32
return vmlal_u32(a, b, c);
// CHECK: umlal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
}
int16x8_t test_vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
// CHECK: test_vmlal_high_s8
return vmlal_high_s8(a, b, c);
// CHECK: smlal2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
}
int32x4_t test_vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
// CHECK: test_vmlal_high_s16
return vmlal_high_s16(a, b, c);
// CHECK: smlal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
// CHECK: test_vmlal_high_s32
return vmlal_high_s32(a, b, c);
// CHECK: smlal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
uint16x8_t test_vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
// CHECK: test_vmlal_high_u8
return vmlal_high_u8(a, b, c);
// CHECK: umlal2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
}
uint32x4_t test_vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
// CHECK: test_vmlal_high_u16
return vmlal_high_u16(a, b, c);
// CHECK: umlal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
uint64x2_t test_vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
// CHECK: test_vmlal_high_u32
return vmlal_high_u32(a, b, c);
// CHECK: umlal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
// CHECK: test_vmlsl_s8
return vmlsl_s8(a, b, c);
// CHECK: smlsl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
}
int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
// CHECK: test_vmlsl_s16
return vmlsl_s16(a, b, c);
// CHECK: smlsl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
}
int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
// CHECK: test_vmlsl_s32
return vmlsl_s32(a, b, c);
// CHECK: smlsl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
}
uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
// CHECK: test_vmlsl_u8
return vmlsl_u8(a, b, c);
// CHECK: umlsl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
}
uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
// CHECK: test_vmlsl_u16
return vmlsl_u16(a, b, c);
// CHECK: umlsl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
}
uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
// CHECK: test_vmlsl_u32
return vmlsl_u32(a, b, c);
// CHECK: umlsl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
}
int16x8_t test_vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
// CHECK: test_vmlsl_high_s8
return vmlsl_high_s8(a, b, c);
// CHECK: smlsl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
}
int32x4_t test_vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
// CHECK: test_vmlsl_high_s16
return vmlsl_high_s16(a, b, c);
// CHECK: smlsl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
// CHECK: test_vmlsl_high_s32
return vmlsl_high_s32(a, b, c);
// CHECK: smlsl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
uint16x8_t test_vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
// CHECK: test_vmlsl_high_u8
return vmlsl_high_u8(a, b, c);
// CHECK: umlsl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
}
uint32x4_t test_vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
// CHECK: test_vmlsl_high_u16
return vmlsl_high_u16(a, b, c);
// CHECK: umlsl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
uint64x2_t test_vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
// CHECK: test_vmlsl_high_u32
return vmlsl_high_u32(a, b, c);
// CHECK: umlsl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
// CHECK: test_vqdmull_s16
return vqdmull_s16(a, b);
// CHECK: sqdmull {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
}
int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
// CHECK: test_vqdmull_s32
return vqdmull_s32(a, b);
// CHECK: sqdmull {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
}
int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
// CHECK: test_vqdmlal_s16
return vqdmlal_s16(a, b, c);
// CHECK: sqdmlal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
}
int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
// CHECK: test_vqdmlal_s32
return vqdmlal_s32(a, b, c);
// CHECK: sqdmlal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
}
int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
// CHECK: test_vqdmlsl_s16
return vqdmlsl_s16(a, b, c);
// CHECK: sqdmlsl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
}
int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
// CHECK: test_vqdmlsl_s32
return vqdmlsl_s32(a, b, c);
// CHECK: sqdmlsl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
}
int32x4_t test_vqdmull_high_s16(int16x8_t a, int16x8_t b) {
// CHECK: test_vqdmull_high_s16
return vqdmull_high_s16(a, b);
// CHECK: sqdmull2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
int64x2_t test_vqdmull_high_s32(int32x4_t a, int32x4_t b) {
// CHECK: test_vqdmull_high_s32
return vqdmull_high_s32(a, b);
// CHECK: sqdmull2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
int32x4_t test_vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
// CHECK: test_vqdmlal_high_s16
return vqdmlal_high_s16(a, b, c);
// CHECK: sqdmlal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
int64x2_t test_vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
// CHECK: test_vqdmlal_high_s32
return vqdmlal_high_s32(a, b, c);
// CHECK: sqdmlal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
int32x4_t test_vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
// CHECK: test_vqdmlsl_high_s16
return vqdmlsl_high_s16(a, b, c);
// CHECK: sqdmlsl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
}
int64x2_t test_vqdmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
// CHECK: test_vqdmlsl_high_s32
return vqdmlsl_high_s32(a, b, c);
// CHECK: sqdmlsl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
}
poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
// CHECK: test_vmull_p8
return vmull_p8(a, b);
// CHECK: pmull {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
}
poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) {
// CHECK: test_vmull_high_p8
return vmull_high_p8(a, b);
// CHECK: pmull2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
}

View File

@ -40,15 +40,22 @@ enum OpKind {
OpUnavailable,
OpAdd,
OpAddl,
OpAddlHi,
OpAddw,
OpAddwHi,
OpSub,
OpSubl,
OpSublHi,
OpSubw,
OpSubwHi,
OpMul,
OpMla,
OpMlal,
OpMullHi,
OpMlalHi,
OpMls,
OpMlsl,
OpMlslHi,
OpMulN,
OpMlaN,
OpMlsN,
@ -88,9 +95,18 @@ enum OpKind {
OpRev32,
OpRev64,
OpReinterpret,
OpAddhnHi,
OpRAddhnHi,
OpSubhnHi,
OpRSubhnHi,
OpAbdl,
OpAbdlHi,
OpAba,
OpAbal,
OpAbalHi,
OpQDMullHi,
OpQDMlalHi,
OpQDMlslHi,
OpDiv,
OpLongHi,
OpNarrowHi,
@ -159,15 +175,22 @@ public:
OpMap["OP_UNAVAILABLE"] = OpUnavailable;
OpMap["OP_ADD"] = OpAdd;
OpMap["OP_ADDL"] = OpAddl;
OpMap["OP_ADDLHi"] = OpAddlHi;
OpMap["OP_ADDW"] = OpAddw;
OpMap["OP_ADDWHi"] = OpAddwHi;
OpMap["OP_SUB"] = OpSub;
OpMap["OP_SUBL"] = OpSubl;
OpMap["OP_SUBLHi"] = OpSublHi;
OpMap["OP_SUBW"] = OpSubw;
OpMap["OP_SUBWHi"] = OpSubwHi;
OpMap["OP_MUL"] = OpMul;
OpMap["OP_MLA"] = OpMla;
OpMap["OP_MLAL"] = OpMlal;
OpMap["OP_MULLHi"] = OpMullHi;
OpMap["OP_MLALHi"] = OpMlalHi;
OpMap["OP_MLS"] = OpMls;
OpMap["OP_MLSL"] = OpMlsl;
OpMap["OP_MLSLHi"] = OpMlslHi;
OpMap["OP_MUL_N"] = OpMulN;
OpMap["OP_MLA_N"] = OpMlaN;
OpMap["OP_MLS_N"] = OpMlsN;
@ -207,9 +230,18 @@ public:
OpMap["OP_REV32"] = OpRev32;
OpMap["OP_REV64"] = OpRev64;
OpMap["OP_REINT"] = OpReinterpret;
OpMap["OP_ADDHNHi"] = OpAddhnHi;
OpMap["OP_RADDHNHi"] = OpRAddhnHi;
OpMap["OP_SUBHNHi"] = OpSubhnHi;
OpMap["OP_RSUBHNHi"] = OpRSubhnHi;
OpMap["OP_ABDL"] = OpAbdl;
OpMap["OP_ABDLHi"] = OpAbdlHi;
OpMap["OP_ABA"] = OpAba;
OpMap["OP_ABAL"] = OpAbal;
OpMap["OP_ABALHi"] = OpAbalHi;
OpMap["OP_QDMULLHi"] = OpQDMullHi;
OpMap["OP_QDMLALHi"] = OpQDMlalHi;
OpMap["OP_QDMLSLHi"] = OpQDMlslHi;
OpMap["OP_DIV"] = OpDiv;
OpMap["OP_LONG_HI"] = OpLongHi;
OpMap["OP_NARROW_HI"] = OpNarrowHi;
@ -326,6 +358,29 @@ static char Narrow(const char t) {
}
}
static std::string GetNarrowTypestr(StringRef ty)
{
std::string s;
for (size_t i = 0, end = ty.size(); i < end; i++) {
switch (ty[i]) {
case 's':
s += 'c';
break;
case 'i':
s += 's';
break;
case 'l':
s += 'i';
break;
default:
s += ty[i];
break;
}
}
return s;
}
/// For a particular StringRef, return the base type code, and whether it has
/// the quad-vector, polynomial, or unsigned modifiers set.
static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
@ -426,6 +481,10 @@ static char ModType(const char mod, char type, bool &quad, bool &poly,
if (type == 'h')
quad = false;
break;
case 'q':
type = Narrow(type);
quad = true;
break;
case 'e':
type = Narrow(type);
usgn = true;
@ -1286,13 +1345,60 @@ static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
}
// Use the vmovl builtin to sign-extend or zero-extend a vector.
static std::string Extend(StringRef typestr, const std::string &a) {
std::string s;
s = MangleName("vmovl", typestr, ClassS);
static std::string Extend(StringRef typestr, const std::string &a, bool h=0) {
std::string s, high;
high = h ? "_high" : "";
s = MangleName("vmovl" + high, typestr, ClassS);
s += "(" + a + ")";
return s;
}
// Get the high 64-bit part of a vector
static std::string GetHigh(const std::string &a, StringRef typestr) {
std::string s;
s = MangleName("vget_high", typestr, ClassS);
s += "(" + a + ")";
return s;
}
// Gen operation with two operands and get high 64-bit for both of two operands.
static std::string Gen2OpWith2High(StringRef typestr,
const std::string &op,
const std::string &a,
const std::string &b) {
std::string s;
std::string Op1 = GetHigh(a, typestr);
std::string Op2 = GetHigh(b, typestr);
s = MangleName(op, typestr, ClassS);
s += "(" + Op1 + ", " + Op2 + ");";
return s;
}
// Gen operation with three operands and get high 64-bit of the latter
// two operands.
static std::string Gen3OpWith2High(StringRef typestr,
const std::string &op,
const std::string &a,
const std::string &b,
const std::string &c) {
std::string s;
std::string Op1 = GetHigh(b, typestr);
std::string Op2 = GetHigh(c, typestr);
s = MangleName(op, typestr, ClassS);
s += "(" + a + ", " + Op1 + ", " + Op2 + ");";
return s;
}
// Gen combine operation by putting a on low 64-bit, and b on high 64-bit.
static std::string GenCombine(std::string typestr,
const std::string &a,
const std::string &b) {
std::string s;
s = MangleName("vcombine", typestr, ClassS);
s += "(" + a + ", " + b + ")";
return s;
}
static std::string Duplicate(unsigned nElts, StringRef typestr,
const std::string &a) {
std::string s;
@ -1368,18 +1474,30 @@ static std::string GenOpString(const std::string &name, OpKind op,
case OpAddl:
s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
break;
case OpAddlHi:
s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";";
break;
case OpAddw:
s += "__a + " + Extend(typestr, "__b") + ";";
break;
case OpAddwHi:
s += "__a + " + Extend(typestr, "__b", 1) + ";";
break;
case OpSub:
s += "__a - __b;";
break;
case OpSubl:
s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
break;
case OpSublHi:
s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";";
break;
case OpSubw:
s += "__a - " + Extend(typestr, "__b") + ";";
break;
case OpSubwHi:
s += "__a - " + Extend(typestr, "__b", 1) + ";";
break;
case OpMulN:
s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
break;
@ -1413,6 +1531,12 @@ static std::string GenOpString(const std::string &name, OpKind op,
case OpMlal:
s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
break;
case OpMullHi:
s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
break;
case OpMlalHi:
s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c");
break;
case OpMlsN:
s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
break;
@ -1433,6 +1557,9 @@ static std::string GenOpString(const std::string &name, OpKind op,
case OpMlsl:
s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
break;
case OpMlslHi:
s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c");
break;
case OpQDMullLane:
s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
SplatLane(nElts, "__b", "__c") + ");";
@ -1560,23 +1687,51 @@ static std::string GenOpString(const std::string &name, OpKind op,
}
break;
}
case OpAbdlHi:
s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b");
break;
case OpAddhnHi: {
std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)";
s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn);
s += ";";
break;
}
case OpRAddhnHi: {
std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)";
s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn);
s += ";";
break;
}
case OpSubhnHi: {
std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)";
s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn);
s += ";";
break;
}
case OpRSubhnHi: {
std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)";
s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn);
s += ";";
break;
}
case OpAba:
s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
break;
case OpAbal: {
s += "__a + ";
std::string abd = MangleName("vabd", typestr, ClassS) + "(__b, __c)";
if (typestr[0] != 'U') {
// vabd results are always unsigned and must be zero-extended.
std::string utype = "U" + typestr.str();
s += "(" + TypeString(proto[0], typestr) + ")";
abd = "(" + TypeString('d', utype) + ")" + abd;
s += Extend(utype, abd) + ";";
} else {
s += Extend(typestr, abd) + ";";
}
case OpAbal:
s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);";
break;
case OpAbalHi:
s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c");
break;
case OpQDMullHi:
s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b");
break;
case OpQDMlalHi:
s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c");
break;
case OpQDMlslHi:
s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c");
break;
}
case OpDiv:
s += "__a / __b;";
break;
@ -1993,6 +2148,7 @@ void NeonEmitter::run(raw_ostream &OS) {
emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap);
// ARM intrinsics must be emitted before AArch64 intrinsics to ensure
// common intrinsics appear only once in the output stream.
@ -2014,6 +2170,10 @@ void NeonEmitter::run(raw_ostream &OS) {
// Emit AArch64-specific intrinsics.
OS << "#ifdef __aarch64__\n";
emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap);
emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap);
emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap);
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
Record *R = RV[i];