[AArch64] Add support for NEON scalar signed saturating accumulated of unsigned

value and unsigned saturating accumulate of signed value instructions.

llvm-svn: 192800
This commit is contained in:
Chad Rosier 2013-10-16 16:09:02 +00:00
parent 5078ea2bd9
commit 178b1cefc7
6 changed files with 256 additions and 2 deletions

View File

@ -191,4 +191,9 @@ def int_aarch64_neon_vchi : Neon_ICmp_Intrinsic;
// Scalar Compare Bitwise Test Bits
def int_aarch64_neon_vtstd : Neon_ICmp_Intrinsic;
// Scalar Signed Saturating Accumulated of Unsigned Value
def int_aarch64_neon_vuqadd : Neon_2Arg_Intrinsic;
// Scalar Unsigned Saturating Accumulated of Unsigned Value
def int_aarch64_neon_vsqadd : Neon_2Arg_Intrinsic;
}

View File

@ -3116,7 +3116,7 @@ def ST1_4V_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">;
// End of vector load/store multiple N-element structure(class SIMD lselem)
// Scalar Arithmetic
// Scalar Three Same
class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
: NeonI_Scalar3Same<u, 0b11, opcode,
@ -3264,6 +3264,29 @@ multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop>{
[], NoItinerary>;
}
multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
string asmop> {
let Constraints = "$Src = $Rd" in {
def bb : NeonI_Scalar2SameMisc<u, 0b00, opcode,
(outs FPR8:$Rd), (ins FPR8:$Src, FPR8:$Rn),
!strconcat(asmop, " $Rd, $Rn"),
[], NoItinerary>;
def hh : NeonI_Scalar2SameMisc<u, 0b01, opcode,
(outs FPR16:$Rd), (ins FPR16:$Src, FPR16:$Rn),
!strconcat(asmop, " $Rd, $Rn"),
[], NoItinerary>;
def ss : NeonI_Scalar2SameMisc<u, 0b10, opcode,
(outs FPR32:$Rd), (ins FPR32:$Src, FPR32:$Rn),
!strconcat(asmop, " $Rd, $Rn"),
[], NoItinerary>;
def dd: NeonI_Scalar2SameMisc<u, 0b11, opcode,
(outs FPR64:$Rd), (ins FPR64:$Src, FPR64:$Rn),
!strconcat(asmop, " $Rd, $Rn"),
[], NoItinerary>;
}
}
multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator Sopnode,
SDPatternOperator Dopnode,
Instruction INSTS,
@ -3283,7 +3306,6 @@ multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator opnode,
(INSTD FPR64:$Rn)>;
}
// AdvSIMD Scalar Two Registers Miscellaneous
class NeonI_Scalar2SameMisc_cmpz_D_size<bit u, bits<5> opcode, string asmop>
: NeonI_Scalar2SameMisc<u, 0b11, opcode,
(outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm),
@ -3311,6 +3333,22 @@ multiclass Neon_Scalar2SameMisc_BHSD_size_patterns<SDPatternOperator opnode,
(INSTD FPR64:$Rn)>;
}
multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns<
SDPatternOperator opnode,
Instruction INSTB,
Instruction INSTH,
Instruction INSTS,
Instruction INSTD> {
def : Pat<(v1i8 (opnode (v1i8 FPR8:$Src), (v1i8 FPR8:$Rn))),
(INSTB FPR8:$Src, FPR8:$Rn)>;
def : Pat<(v1i16 (opnode (v1i16 FPR16:$Src), (v1i16 FPR16:$Rn))),
(INSTH FPR16:$Src, FPR16:$Rn)>;
def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i32 FPR32:$Rn))),
(INSTS FPR32:$Src, FPR32:$Rn)>;
def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn))),
(INSTD FPR64:$Src, FPR64:$Rn)>;
}
// Scalar Integer Add
let isCommutable = 1 in {
def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
@ -3539,6 +3577,18 @@ defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">;
defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqneg,
SQNEGbb, SQNEGhh, SQNEGss, SQNEGdd>;
// Scalar Signed Saturating Accumulated of Unsigned Value
defm SUQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b0, 0b00011, "suqadd">;
defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vuqadd,
SUQADDbb, SUQADDhh,
SUQADDss, SUQADDdd>;
// Scalar Unsigned Saturating Accumulated of Unsigned Value
defm USQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b1, 0b00011, "usqadd">;
defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vsqadd,
USQADDbb, USQADDhh,
USQADDss, USQADDdd>;
// Scalar Reduce Pairwise
multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,

View File

@ -169,3 +169,107 @@ define <1 x i64> @test_sqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
;CHECK: sqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}
define i8 @test_vuqaddb_s8(i8 %a, i8 %b) {
; CHECK: test_vuqaddb_s8
; CHECK: suqadd {{b[0-9]+}}, {{b[0-9]+}}
entry:
%vuqadd.i = insertelement <1 x i8> undef, i8 %a, i32 0
%vuqadd1.i = insertelement <1 x i8> undef, i8 %b, i32 0
%vuqadd2.i = call <1 x i8> @llvm.aarch64.neon.vuqadd.v1i8(<1 x i8> %vuqadd.i, <1 x i8> %vuqadd1.i)
%0 = extractelement <1 x i8> %vuqadd2.i, i32 0
ret i8 %0
}
declare <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8>, <1 x i8>)
define i16 @test_vuqaddh_s16(i16 %a, i16 %b) {
; CHECK: test_vuqaddh_s16
; CHECK: suqadd {{h[0-9]+}}, {{h[0-9]+}}
entry:
%vuqadd.i = insertelement <1 x i16> undef, i16 %a, i32 0
%vuqadd1.i = insertelement <1 x i16> undef, i16 %b, i32 0
%vuqadd2.i = call <1 x i16> @llvm.aarch64.neon.vuqadd.v1i16(<1 x i16> %vuqadd.i, <1 x i16> %vuqadd1.i)
%0 = extractelement <1 x i16> %vuqadd2.i, i32 0
ret i16 %0
}
declare <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16>, <1 x i16>)
define i32 @test_vuqadds_s32(i32 %a, i32 %b) {
; CHECK: test_vuqadds_s32
; CHECK: suqadd {{s[0-9]+}}, {{s[0-9]+}}
entry:
%vuqadd.i = insertelement <1 x i32> undef, i32 %a, i32 0
%vuqadd1.i = insertelement <1 x i32> undef, i32 %b, i32 0
%vuqadd2.i = call <1 x i32> @llvm.aarch64.neon.vuqadd.v1i32(<1 x i32> %vuqadd.i, <1 x i32> %vuqadd1.i)
%0 = extractelement <1 x i32> %vuqadd2.i, i32 0
ret i32 %0
}
declare <1 x i32> @llvm.aarch64.neon.vsqadd.v1i32(<1 x i32>, <1 x i32>)
define i64 @test_vuqaddd_s64(i64 %a, i64 %b) {
; CHECK: test_vuqaddd_s64
; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}}
entry:
%vuqadd.i = insertelement <1 x i64> undef, i64 %a, i32 0
%vuqadd1.i = insertelement <1 x i64> undef, i64 %b, i32 0
%vuqadd2.i = call <1 x i64> @llvm.aarch64.neon.vuqadd.v1i64(<1 x i64> %vuqadd.i, <1 x i64> %vuqadd1.i)
%0 = extractelement <1 x i64> %vuqadd2.i, i32 0
ret i64 %0
}
declare <1 x i64> @llvm.aarch64.neon.vsqadd.v1i64(<1 x i64>, <1 x i64>)
define i8 @test_vsqaddb_u8(i8 %a, i8 %b) {
; CHECK: test_vsqaddb_u8
; CHECK: usqadd {{b[0-9]+}}, {{b[0-9]+}}
entry:
%vsqadd.i = insertelement <1 x i8> undef, i8 %a, i32 0
%vsqadd1.i = insertelement <1 x i8> undef, i8 %b, i32 0
%vsqadd2.i = call <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8> %vsqadd.i, <1 x i8> %vsqadd1.i)
%0 = extractelement <1 x i8> %vsqadd2.i, i32 0
ret i8 %0
}
declare <1 x i8> @llvm.aarch64.neon.vuqadd.v1i8(<1 x i8>, <1 x i8>)
define i16 @test_vsqaddh_u16(i16 %a, i16 %b) {
; CHECK: test_vsqaddh_u16
; CHECK: usqadd {{h[0-9]+}}, {{h[0-9]+}}
entry:
%vsqadd.i = insertelement <1 x i16> undef, i16 %a, i32 0
%vsqadd1.i = insertelement <1 x i16> undef, i16 %b, i32 0
%vsqadd2.i = call <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16> %vsqadd.i, <1 x i16> %vsqadd1.i)
%0 = extractelement <1 x i16> %vsqadd2.i, i32 0
ret i16 %0
}
declare <1 x i16> @llvm.aarch64.neon.vuqadd.v1i16(<1 x i16>, <1 x i16>)
define i32 @test_vsqadds_u32(i32 %a, i32 %b) {
; CHECK: test_vsqadds_u32
; CHECK: usqadd {{s[0-9]+}}, {{s[0-9]+}}
entry:
%vsqadd.i = insertelement <1 x i32> undef, i32 %a, i32 0
%vsqadd1.i = insertelement <1 x i32> undef, i32 %b, i32 0
%vsqadd2.i = call <1 x i32> @llvm.aarch64.neon.vsqadd.v1i32(<1 x i32> %vsqadd.i, <1 x i32> %vsqadd1.i)
%0 = extractelement <1 x i32> %vsqadd2.i, i32 0
ret i32 %0
}
declare <1 x i32> @llvm.aarch64.neon.vuqadd.v1i32(<1 x i32>, <1 x i32>)
define i64 @test_vsqaddd_u64(i64 %a, i64 %b) {
; CHECK: test_vsqaddd_u64
; CHECK: usqadd {{d[0-9]+}}, {{d[0-9]+}}
entry:
%vsqadd.i = insertelement <1 x i64> undef, i64 %a, i32 0
%vsqadd1.i = insertelement <1 x i64> undef, i64 %b, i32 0
%vsqadd2.i = call <1 x i64> @llvm.aarch64.neon.vsqadd.v1i64(<1 x i64> %vsqadd.i, <1 x i64> %vsqadd1.i)
%0 = extractelement <1 x i64> %vsqadd2.i, i32 0
ret i64 %0
}
declare <1 x i64> @llvm.aarch64.neon.vuqadd.v1i64(<1 x i64>, <1 x i64>)

View File

@ -4396,3 +4396,47 @@
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: cmtst b20, d21, d22
// CHECK-ERROR: ^
//----------------------------------------------------------------------
// Scalar Signed Saturating Accumulated of Unsigned Value
//----------------------------------------------------------------------
suqadd b0, h1
suqadd h0, s1
suqadd s0, d1
suqadd d0, b0
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: suqadd b0, h1
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: suqadd h0, s1
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: suqadd s0, d1
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: suqadd d0, b0
// CHECK-ERROR: ^
//----------------------------------------------------------------------
// Scalar Unsigned Saturating Accumulated of Unsigned Value
//----------------------------------------------------------------------
usqadd b0, h1
usqadd h0, s1
usqadd s0, d1
usqadd d0, b1
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: usqadd b0, h1
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: usqadd h0, s1
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: usqadd s0, d1
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: usqadd d0, b1
// CHECK-ERROR: ^

View File

@ -52,3 +52,30 @@
// CHECK: uqsub s20, s21, s2 // encoding: [0xb4,0x2e,0xa2,0x7e]
// CHECK: uqsub d17, d31, d8 // encoding: [0xf1,0x2f,0xe8,0x7e]
//----------------------------------------------------------------------
// Signed Saturating Accumulated of Unsigned Value
//----------------------------------------------------------------------
suqadd b19, b14
suqadd h20, h15
suqadd s21, s12
suqadd d18, d22
// CHECK: suqadd b19, b14 // encoding: [0xd3,0x39,0x20,0x5e]
// CHECK: suqadd h20, h15 // encoding: [0xf4,0x39,0x60,0x5e]
// CHECK: suqadd s21, s12 // encoding: [0x95,0x39,0xa0,0x5e]
// CHECK: suqadd d18, d22 // encoding: [0xd2,0x3a,0xe0,0x5e]
//----------------------------------------------------------------------
// Unsigned Saturating Accumulated of Unsigned Value
//----------------------------------------------------------------------
usqadd b19, b14
usqadd h20, h15
usqadd s21, s12
usqadd d18, d22
// CHECK: usqadd b19, b14 // encoding: [0xd3,0x39,0x20,0x7e]
// CHECK: usqadd h20, h15 // encoding: [0xf4,0x39,0x60,0x7e]
// CHECK: usqadd s21, s12 // encoding: [0x95,0x39,0xa0,0x7e]
// CHECK: usqadd d18, d22 // encoding: [0xd2,0x3a,0xe0,0x7e]

View File

@ -1623,3 +1623,27 @@
0xf5,0x79,0x60,0x7e
0x94,0x79,0xa0,0x7e
0x92,0x79,0xe0,0x7e
#----------------------------------------------------------------------
# Signed Saturating Accumulated of Unsigned Value
#----------------------------------------------------------------------
# CHECK: suqadd b19, b14
# CHECK: suqadd h20, h15
# CHECK: suqadd s21, s12
# CHECK: suqadd d18, d22
0xd3,0x39,0x20,0x5e
0xf4,0x39,0x60,0x5e
0x95,0x39,0xa0,0x5e
0xd2,0x3a,0xe0,0x5e
#----------------------------------------------------------------------
# Unsigned Saturating Accumulated of Unsigned Value
#----------------------------------------------------------------------
# CHECK: usqadd b19, b14
# CHECK: usqadd h20, h15
# CHECK: usqadd s21, s12
# CHECK: usqadd d18, d22
0xd3,0x39,0x20,0x7e
0xf4,0x39,0x60,0x7e
0x95,0x39,0xa0,0x7e
0xd2,0x3a,0xe0,0x7e