forked from OSchip/llvm-project
[AArch64] Add support for NEON scalar signed saturating accumulated of unsigned
value and unsigned saturating accumulate of signed value instructions. llvm-svn: 192800
This commit is contained in:
parent
5078ea2bd9
commit
178b1cefc7
|
@ -191,4 +191,9 @@ def int_aarch64_neon_vchi : Neon_ICmp_Intrinsic;
|
|||
// Scalar Compare Bitwise Test Bits
|
||||
def int_aarch64_neon_vtstd : Neon_ICmp_Intrinsic;
|
||||
|
||||
// Scalar Signed Saturating Accumulated of Unsigned Value
|
||||
def int_aarch64_neon_vuqadd : Neon_2Arg_Intrinsic;
|
||||
|
||||
// Scalar Unsigned Saturating Accumulated of Unsigned Value
|
||||
def int_aarch64_neon_vsqadd : Neon_2Arg_Intrinsic;
|
||||
}
|
||||
|
|
|
@ -3116,7 +3116,7 @@ def ST1_4V_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">;
|
|||
|
||||
// End of vector load/store multiple N-element structure(class SIMD lselem)
|
||||
|
||||
// Scalar Arithmetic
|
||||
// Scalar Three Same
|
||||
|
||||
class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
|
||||
: NeonI_Scalar3Same<u, 0b11, opcode,
|
||||
|
@ -3264,6 +3264,29 @@ multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop>{
|
|||
[], NoItinerary>;
|
||||
}
|
||||
|
||||
multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
|
||||
string asmop> {
|
||||
|
||||
let Constraints = "$Src = $Rd" in {
|
||||
def bb : NeonI_Scalar2SameMisc<u, 0b00, opcode,
|
||||
(outs FPR8:$Rd), (ins FPR8:$Src, FPR8:$Rn),
|
||||
!strconcat(asmop, " $Rd, $Rn"),
|
||||
[], NoItinerary>;
|
||||
def hh : NeonI_Scalar2SameMisc<u, 0b01, opcode,
|
||||
(outs FPR16:$Rd), (ins FPR16:$Src, FPR16:$Rn),
|
||||
!strconcat(asmop, " $Rd, $Rn"),
|
||||
[], NoItinerary>;
|
||||
def ss : NeonI_Scalar2SameMisc<u, 0b10, opcode,
|
||||
(outs FPR32:$Rd), (ins FPR32:$Src, FPR32:$Rn),
|
||||
!strconcat(asmop, " $Rd, $Rn"),
|
||||
[], NoItinerary>;
|
||||
def dd: NeonI_Scalar2SameMisc<u, 0b11, opcode,
|
||||
(outs FPR64:$Rd), (ins FPR64:$Src, FPR64:$Rn),
|
||||
!strconcat(asmop, " $Rd, $Rn"),
|
||||
[], NoItinerary>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator Sopnode,
|
||||
SDPatternOperator Dopnode,
|
||||
Instruction INSTS,
|
||||
|
@ -3283,7 +3306,6 @@ multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator opnode,
|
|||
(INSTD FPR64:$Rn)>;
|
||||
}
|
||||
|
||||
// AdvSIMD Scalar Two Registers Miscellaneous
|
||||
class NeonI_Scalar2SameMisc_cmpz_D_size<bit u, bits<5> opcode, string asmop>
|
||||
: NeonI_Scalar2SameMisc<u, 0b11, opcode,
|
||||
(outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm),
|
||||
|
@ -3311,6 +3333,22 @@ multiclass Neon_Scalar2SameMisc_BHSD_size_patterns<SDPatternOperator opnode,
|
|||
(INSTD FPR64:$Rn)>;
|
||||
}
|
||||
|
||||
multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns<
|
||||
SDPatternOperator opnode,
|
||||
Instruction INSTB,
|
||||
Instruction INSTH,
|
||||
Instruction INSTS,
|
||||
Instruction INSTD> {
|
||||
def : Pat<(v1i8 (opnode (v1i8 FPR8:$Src), (v1i8 FPR8:$Rn))),
|
||||
(INSTB FPR8:$Src, FPR8:$Rn)>;
|
||||
def : Pat<(v1i16 (opnode (v1i16 FPR16:$Src), (v1i16 FPR16:$Rn))),
|
||||
(INSTH FPR16:$Src, FPR16:$Rn)>;
|
||||
def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i32 FPR32:$Rn))),
|
||||
(INSTS FPR32:$Src, FPR32:$Rn)>;
|
||||
def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn))),
|
||||
(INSTD FPR64:$Src, FPR64:$Rn)>;
|
||||
}
|
||||
|
||||
// Scalar Integer Add
|
||||
let isCommutable = 1 in {
|
||||
def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
|
||||
|
@ -3539,6 +3577,18 @@ defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">;
|
|||
defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqneg,
|
||||
SQNEGbb, SQNEGhh, SQNEGss, SQNEGdd>;
|
||||
|
||||
// Scalar Signed Saturating Accumulated of Unsigned Value
|
||||
defm SUQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b0, 0b00011, "suqadd">;
|
||||
defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vuqadd,
|
||||
SUQADDbb, SUQADDhh,
|
||||
SUQADDss, SUQADDdd>;
|
||||
|
||||
// Scalar Unsigned Saturating Accumulated of Unsigned Value
|
||||
defm USQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b1, 0b00011, "usqadd">;
|
||||
defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vsqadd,
|
||||
USQADDbb, USQADDhh,
|
||||
USQADDss, USQADDdd>;
|
||||
|
||||
// Scalar Reduce Pairwise
|
||||
|
||||
multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
|
||||
|
|
|
@ -169,3 +169,107 @@ define <1 x i64> @test_sqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
|||
;CHECK: sqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define i8 @test_vuqaddb_s8(i8 %a, i8 %b) {
|
||||
; CHECK: test_vuqaddb_s8
|
||||
; CHECK: suqadd {{b[0-9]+}}, {{b[0-9]+}}
|
||||
entry:
|
||||
%vuqadd.i = insertelement <1 x i8> undef, i8 %a, i32 0
|
||||
%vuqadd1.i = insertelement <1 x i8> undef, i8 %b, i32 0
|
||||
%vuqadd2.i = call <1 x i8> @llvm.aarch64.neon.vuqadd.v1i8(<1 x i8> %vuqadd.i, <1 x i8> %vuqadd1.i)
|
||||
%0 = extractelement <1 x i8> %vuqadd2.i, i32 0
|
||||
ret i8 %0
|
||||
}
|
||||
|
||||
declare <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8>, <1 x i8>)
|
||||
|
||||
define i16 @test_vuqaddh_s16(i16 %a, i16 %b) {
|
||||
; CHECK: test_vuqaddh_s16
|
||||
; CHECK: suqadd {{h[0-9]+}}, {{h[0-9]+}}
|
||||
entry:
|
||||
%vuqadd.i = insertelement <1 x i16> undef, i16 %a, i32 0
|
||||
%vuqadd1.i = insertelement <1 x i16> undef, i16 %b, i32 0
|
||||
%vuqadd2.i = call <1 x i16> @llvm.aarch64.neon.vuqadd.v1i16(<1 x i16> %vuqadd.i, <1 x i16> %vuqadd1.i)
|
||||
%0 = extractelement <1 x i16> %vuqadd2.i, i32 0
|
||||
ret i16 %0
|
||||
}
|
||||
|
||||
declare <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16>, <1 x i16>)
|
||||
|
||||
define i32 @test_vuqadds_s32(i32 %a, i32 %b) {
|
||||
; CHECK: test_vuqadds_s32
|
||||
; CHECK: suqadd {{s[0-9]+}}, {{s[0-9]+}}
|
||||
entry:
|
||||
%vuqadd.i = insertelement <1 x i32> undef, i32 %a, i32 0
|
||||
%vuqadd1.i = insertelement <1 x i32> undef, i32 %b, i32 0
|
||||
%vuqadd2.i = call <1 x i32> @llvm.aarch64.neon.vuqadd.v1i32(<1 x i32> %vuqadd.i, <1 x i32> %vuqadd1.i)
|
||||
%0 = extractelement <1 x i32> %vuqadd2.i, i32 0
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
declare <1 x i32> @llvm.aarch64.neon.vsqadd.v1i32(<1 x i32>, <1 x i32>)
|
||||
|
||||
define i64 @test_vuqaddd_s64(i64 %a, i64 %b) {
|
||||
; CHECK: test_vuqaddd_s64
|
||||
; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}}
|
||||
entry:
|
||||
%vuqadd.i = insertelement <1 x i64> undef, i64 %a, i32 0
|
||||
%vuqadd1.i = insertelement <1 x i64> undef, i64 %b, i32 0
|
||||
%vuqadd2.i = call <1 x i64> @llvm.aarch64.neon.vuqadd.v1i64(<1 x i64> %vuqadd.i, <1 x i64> %vuqadd1.i)
|
||||
%0 = extractelement <1 x i64> %vuqadd2.i, i32 0
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.aarch64.neon.vsqadd.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define i8 @test_vsqaddb_u8(i8 %a, i8 %b) {
|
||||
; CHECK: test_vsqaddb_u8
|
||||
; CHECK: usqadd {{b[0-9]+}}, {{b[0-9]+}}
|
||||
entry:
|
||||
%vsqadd.i = insertelement <1 x i8> undef, i8 %a, i32 0
|
||||
%vsqadd1.i = insertelement <1 x i8> undef, i8 %b, i32 0
|
||||
%vsqadd2.i = call <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8> %vsqadd.i, <1 x i8> %vsqadd1.i)
|
||||
%0 = extractelement <1 x i8> %vsqadd2.i, i32 0
|
||||
ret i8 %0
|
||||
}
|
||||
|
||||
declare <1 x i8> @llvm.aarch64.neon.vuqadd.v1i8(<1 x i8>, <1 x i8>)
|
||||
|
||||
define i16 @test_vsqaddh_u16(i16 %a, i16 %b) {
|
||||
; CHECK: test_vsqaddh_u16
|
||||
; CHECK: usqadd {{h[0-9]+}}, {{h[0-9]+}}
|
||||
entry:
|
||||
%vsqadd.i = insertelement <1 x i16> undef, i16 %a, i32 0
|
||||
%vsqadd1.i = insertelement <1 x i16> undef, i16 %b, i32 0
|
||||
%vsqadd2.i = call <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16> %vsqadd.i, <1 x i16> %vsqadd1.i)
|
||||
%0 = extractelement <1 x i16> %vsqadd2.i, i32 0
|
||||
ret i16 %0
|
||||
}
|
||||
|
||||
declare <1 x i16> @llvm.aarch64.neon.vuqadd.v1i16(<1 x i16>, <1 x i16>)
|
||||
|
||||
define i32 @test_vsqadds_u32(i32 %a, i32 %b) {
|
||||
; CHECK: test_vsqadds_u32
|
||||
; CHECK: usqadd {{s[0-9]+}}, {{s[0-9]+}}
|
||||
entry:
|
||||
%vsqadd.i = insertelement <1 x i32> undef, i32 %a, i32 0
|
||||
%vsqadd1.i = insertelement <1 x i32> undef, i32 %b, i32 0
|
||||
%vsqadd2.i = call <1 x i32> @llvm.aarch64.neon.vsqadd.v1i32(<1 x i32> %vsqadd.i, <1 x i32> %vsqadd1.i)
|
||||
%0 = extractelement <1 x i32> %vsqadd2.i, i32 0
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
declare <1 x i32> @llvm.aarch64.neon.vuqadd.v1i32(<1 x i32>, <1 x i32>)
|
||||
|
||||
define i64 @test_vsqaddd_u64(i64 %a, i64 %b) {
|
||||
; CHECK: test_vsqaddd_u64
|
||||
; CHECK: usqadd {{d[0-9]+}}, {{d[0-9]+}}
|
||||
entry:
|
||||
%vsqadd.i = insertelement <1 x i64> undef, i64 %a, i32 0
|
||||
%vsqadd1.i = insertelement <1 x i64> undef, i64 %b, i32 0
|
||||
%vsqadd2.i = call <1 x i64> @llvm.aarch64.neon.vsqadd.v1i64(<1 x i64> %vsqadd.i, <1 x i64> %vsqadd1.i)
|
||||
%0 = extractelement <1 x i64> %vsqadd2.i, i32 0
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.aarch64.neon.vuqadd.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
|
|
@ -4396,3 +4396,47 @@
|
|||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: cmtst b20, d21, d22
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Signed Saturating Accumulated of Unsigned Value
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
suqadd b0, h1
|
||||
suqadd h0, s1
|
||||
suqadd s0, d1
|
||||
suqadd d0, b0
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: suqadd b0, h1
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: suqadd h0, s1
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: suqadd s0, d1
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: suqadd d0, b0
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Unsigned Saturating Accumulated of Unsigned Value
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
usqadd b0, h1
|
||||
usqadd h0, s1
|
||||
usqadd s0, d1
|
||||
usqadd d0, b1
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: usqadd b0, h1
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: usqadd h0, s1
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: usqadd s0, d1
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: usqadd d0, b1
|
||||
// CHECK-ERROR: ^
|
||||
|
|
|
@ -52,3 +52,30 @@
|
|||
// CHECK: uqsub s20, s21, s2 // encoding: [0xb4,0x2e,0xa2,0x7e]
|
||||
// CHECK: uqsub d17, d31, d8 // encoding: [0xf1,0x2f,0xe8,0x7e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Signed Saturating Accumulated of Unsigned Value
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
suqadd b19, b14
|
||||
suqadd h20, h15
|
||||
suqadd s21, s12
|
||||
suqadd d18, d22
|
||||
|
||||
// CHECK: suqadd b19, b14 // encoding: [0xd3,0x39,0x20,0x5e]
|
||||
// CHECK: suqadd h20, h15 // encoding: [0xf4,0x39,0x60,0x5e]
|
||||
// CHECK: suqadd s21, s12 // encoding: [0x95,0x39,0xa0,0x5e]
|
||||
// CHECK: suqadd d18, d22 // encoding: [0xd2,0x3a,0xe0,0x5e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Unsigned Saturating Accumulated of Unsigned Value
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
usqadd b19, b14
|
||||
usqadd h20, h15
|
||||
usqadd s21, s12
|
||||
usqadd d18, d22
|
||||
|
||||
// CHECK: usqadd b19, b14 // encoding: [0xd3,0x39,0x20,0x7e]
|
||||
// CHECK: usqadd h20, h15 // encoding: [0xf4,0x39,0x60,0x7e]
|
||||
// CHECK: usqadd s21, s12 // encoding: [0x95,0x39,0xa0,0x7e]
|
||||
// CHECK: usqadd d18, d22 // encoding: [0xd2,0x3a,0xe0,0x7e]
|
||||
|
|
|
@ -1623,3 +1623,27 @@
|
|||
0xf5,0x79,0x60,0x7e
|
||||
0x94,0x79,0xa0,0x7e
|
||||
0x92,0x79,0xe0,0x7e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Signed Saturating Accumulated of Unsigned Value
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: suqadd b19, b14
|
||||
# CHECK: suqadd h20, h15
|
||||
# CHECK: suqadd s21, s12
|
||||
# CHECK: suqadd d18, d22
|
||||
0xd3,0x39,0x20,0x5e
|
||||
0xf4,0x39,0x60,0x5e
|
||||
0x95,0x39,0xa0,0x5e
|
||||
0xd2,0x3a,0xe0,0x5e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Unsigned Saturating Accumulated of Unsigned Value
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: usqadd b19, b14
|
||||
# CHECK: usqadd h20, h15
|
||||
# CHECK: usqadd s21, s12
|
||||
# CHECK: usqadd d18, d22
|
||||
0xd3,0x39,0x20,0x7e
|
||||
0xf4,0x39,0x60,0x7e
|
||||
0x95,0x39,0xa0,0x7e
|
||||
0xd2,0x3a,0xe0,0x7e
|
||||
|
|
Loading…
Reference in New Issue