forked from OSchip/llvm-project
[ARM] MVE vabd
This adds MVE lowering for VABDS/VABDU, using the code parted from AArch64 in D91937. Differential Revision: https://reviews.llvm.org/D91938
This commit is contained in:
parent
2887f14639
commit
0f83d37a14
|
@ -281,6 +281,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
|
|||
setOperationAction(ISD::UADDSAT, VT, Legal);
|
||||
setOperationAction(ISD::SSUBSAT, VT, Legal);
|
||||
setOperationAction(ISD::USUBSAT, VT, Legal);
|
||||
setOperationAction(ISD::ABDS, VT, Legal);
|
||||
setOperationAction(ISD::ABDU, VT, Legal);
|
||||
|
||||
// No native support for these.
|
||||
setOperationAction(ISD::UDIV, VT, Expand);
|
||||
|
@ -14616,6 +14618,8 @@ static SDValue FlattenVectorShuffle(ShuffleVectorSDNode *N, SelectionDAG &DAG) {
|
|||
case ARMISD::VQDMULH:
|
||||
case ISD::MULHS:
|
||||
case ISD::MULHU:
|
||||
case ISD::ABDS:
|
||||
case ISD::ABDU:
|
||||
break;
|
||||
default:
|
||||
return SDValue();
|
||||
|
|
|
@ -2131,36 +2131,31 @@ class MVE_VABD_int<string suffix, bit U, bits<2> size,
|
|||
let validForTailPredication = 1;
|
||||
}
|
||||
|
||||
multiclass MVE_VABD_m<MVEVectorVTInfo VTI,
|
||||
Intrinsic unpred_int, Intrinsic pred_int> {
|
||||
multiclass MVE_VABD_m<MVEVectorVTInfo VTI, SDNode Op,
|
||||
Intrinsic unpred_int, Intrinsic PredInt> {
|
||||
def "" : MVE_VABD_int<VTI.Suffix, VTI.Unsigned, VTI.Size>;
|
||||
defvar Inst = !cast<Instruction>(NAME);
|
||||
|
||||
let Predicates = [HasMVEInt] in {
|
||||
defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)),
|
||||
!cast<Instruction>(NAME)>;
|
||||
|
||||
// Unpredicated absolute difference
|
||||
def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
|
||||
(i32 VTI.Unsigned))),
|
||||
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
|
||||
|
||||
// Predicated absolute difference
|
||||
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
|
||||
(i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
|
||||
(VTI.Vec MQPR:$inactive))),
|
||||
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
|
||||
ARMVCCThen, (VTI.Pred VCCR:$mask),
|
||||
(VTI.Vec MQPR:$inactive)))>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass MVE_VABD<MVEVectorVTInfo VTI>
|
||||
: MVE_VABD_m<VTI, int_arm_mve_vabd, int_arm_mve_abd_predicated>;
|
||||
multiclass MVE_VABD<MVEVectorVTInfo VTI, SDNode Op>
|
||||
: MVE_VABD_m<VTI, Op, int_arm_mve_vabd, int_arm_mve_abd_predicated>;
|
||||
|
||||
defm MVE_VABDs8 : MVE_VABD<MVE_v16s8>;
|
||||
defm MVE_VABDs16 : MVE_VABD<MVE_v8s16>;
|
||||
defm MVE_VABDs32 : MVE_VABD<MVE_v4s32>;
|
||||
defm MVE_VABDu8 : MVE_VABD<MVE_v16u8>;
|
||||
defm MVE_VABDu16 : MVE_VABD<MVE_v8u16>;
|
||||
defm MVE_VABDu32 : MVE_VABD<MVE_v4u32>;
|
||||
defm MVE_VABDs8 : MVE_VABD<MVE_v16s8, abds>;
|
||||
defm MVE_VABDs16 : MVE_VABD<MVE_v8s16, abds>;
|
||||
defm MVE_VABDs32 : MVE_VABD<MVE_v4s32, abds>;
|
||||
defm MVE_VABDu8 : MVE_VABD<MVE_v16u8, abdu>;
|
||||
defm MVE_VABDu16 : MVE_VABD<MVE_v8u16, abdu>;
|
||||
defm MVE_VABDu32 : MVE_VABD<MVE_v4u32, abdu>;
|
||||
|
||||
class MVE_VRHADD_Base<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
|
||||
: MVE_int<"vrhadd", suffix, size, pattern> {
|
||||
|
|
|
@ -4,15 +4,7 @@
|
|||
define arm_aapcs_vfpcc <16 x i8> @vabd_s8(<16 x i8> %src1, <16 x i8> %src2) {
|
||||
; CHECK-LABEL: vabd_s8:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vmovlt.s8 q2, q1
|
||||
; CHECK-NEXT: vmovlt.s8 q3, q0
|
||||
; CHECK-NEXT: vmovlb.s8 q1, q1
|
||||
; CHECK-NEXT: vmovlb.s8 q0, q0
|
||||
; CHECK-NEXT: vsub.i16 q2, q3, q2
|
||||
; CHECK-NEXT: vsub.i16 q0, q0, q1
|
||||
; CHECK-NEXT: vabs.s16 q2, q2
|
||||
; CHECK-NEXT: vabs.s16 q0, q0
|
||||
; CHECK-NEXT: vmovnt.i16 q0, q2
|
||||
; CHECK-NEXT: vabd.s8 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
%sextsrc1 = sext <16 x i8> %src1 to <16 x i16>
|
||||
%sextsrc2 = sext <16 x i8> %src2 to <16 x i16>
|
||||
|
@ -27,15 +19,7 @@ define arm_aapcs_vfpcc <16 x i8> @vabd_s8(<16 x i8> %src1, <16 x i8> %src2) {
|
|||
define arm_aapcs_vfpcc <8 x i16> @vabd_s16(<8 x i16> %src1, <8 x i16> %src2) {
|
||||
; CHECK-LABEL: vabd_s16:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vmovlt.s16 q2, q1
|
||||
; CHECK-NEXT: vmovlt.s16 q3, q0
|
||||
; CHECK-NEXT: vmovlb.s16 q1, q1
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: vsub.i32 q2, q3, q2
|
||||
; CHECK-NEXT: vsub.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vabs.s32 q2, q2
|
||||
; CHECK-NEXT: vabs.s32 q0, q0
|
||||
; CHECK-NEXT: vmovnt.i32 q0, q2
|
||||
; CHECK-NEXT: vabd.s16 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
%sextsrc1 = sext <8 x i16> %src1 to <8 x i32>
|
||||
%sextsrc2 = sext <8 x i16> %src2 to <8 x i32>
|
||||
|
@ -50,46 +34,7 @@ define arm_aapcs_vfpcc <8 x i16> @vabd_s16(<8 x i16> %src1, <8 x i16> %src2) {
|
|||
define arm_aapcs_vfpcc <4 x i32> @vabd_s32(<4 x i32> %src1, <4 x i32> %src2) {
|
||||
; CHECK-LABEL: vabd_s32:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov.f32 s12, s2
|
||||
; CHECK-NEXT: vmov.f32 s14, s3
|
||||
; CHECK-NEXT: vmov.f32 s16, s6
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmov.f32 s18, s7
|
||||
; CHECK-NEXT: vmov r2, s16
|
||||
; CHECK-NEXT: vmov.f32 s2, s1
|
||||
; CHECK-NEXT: vmov.f32 s6, s5
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: asrs r1, r0, #31
|
||||
; CHECK-NEXT: subs r0, r0, r2
|
||||
; CHECK-NEXT: sbc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: add.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: eor.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: asrs r2, r1, #31
|
||||
; CHECK-NEXT: subs r1, r1, r3
|
||||
; CHECK-NEXT: sbc.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: add.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: eor.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov r2, s18
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: asrs r1, r0, #31
|
||||
; CHECK-NEXT: subs r0, r0, r2
|
||||
; CHECK-NEXT: sbc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: add.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: eor.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: asrs r2, r1, #31
|
||||
; CHECK-NEXT: subs r1, r1, r3
|
||||
; CHECK-NEXT: sbc.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: add.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: eor.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov q0, q2
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: vabd.s32 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
%sextsrc1 = sext <4 x i32> %src1 to <4 x i64>
|
||||
%sextsrc2 = sext <4 x i32> %src2 to <4 x i64>
|
||||
|
@ -104,15 +49,7 @@ define arm_aapcs_vfpcc <4 x i32> @vabd_s32(<4 x i32> %src1, <4 x i32> %src2) {
|
|||
define arm_aapcs_vfpcc <16 x i8> @vabd_u8(<16 x i8> %src1, <16 x i8> %src2) {
|
||||
; CHECK-LABEL: vabd_u8:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vmovlt.u8 q2, q1
|
||||
; CHECK-NEXT: vmovlt.u8 q3, q0
|
||||
; CHECK-NEXT: vmovlb.u8 q1, q1
|
||||
; CHECK-NEXT: vmovlb.u8 q0, q0
|
||||
; CHECK-NEXT: vsub.i16 q2, q3, q2
|
||||
; CHECK-NEXT: vsub.i16 q0, q0, q1
|
||||
; CHECK-NEXT: vabs.s16 q2, q2
|
||||
; CHECK-NEXT: vabs.s16 q0, q0
|
||||
; CHECK-NEXT: vmovnt.i16 q0, q2
|
||||
; CHECK-NEXT: vabd.u8 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
%zextsrc1 = zext <16 x i8> %src1 to <16 x i16>
|
||||
%zextsrc2 = zext <16 x i8> %src2 to <16 x i16>
|
||||
|
@ -127,15 +64,7 @@ define arm_aapcs_vfpcc <16 x i8> @vabd_u8(<16 x i8> %src1, <16 x i8> %src2) {
|
|||
define arm_aapcs_vfpcc <8 x i16> @vabd_u16(<8 x i16> %src1, <8 x i16> %src2) {
|
||||
; CHECK-LABEL: vabd_u16:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vmovlt.u16 q2, q1
|
||||
; CHECK-NEXT: vmovlt.u16 q3, q0
|
||||
; CHECK-NEXT: vmovlb.u16 q1, q1
|
||||
; CHECK-NEXT: vmovlb.u16 q0, q0
|
||||
; CHECK-NEXT: vsub.i32 q2, q3, q2
|
||||
; CHECK-NEXT: vsub.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vabs.s32 q2, q2
|
||||
; CHECK-NEXT: vabs.s32 q0, q0
|
||||
; CHECK-NEXT: vmovnt.i32 q0, q2
|
||||
; CHECK-NEXT: vabd.u16 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
|
||||
%zextsrc2 = zext <8 x i16> %src2 to <8 x i32>
|
||||
|
@ -150,46 +79,7 @@ define arm_aapcs_vfpcc <8 x i16> @vabd_u16(<8 x i16> %src1, <8 x i16> %src2) {
|
|||
define arm_aapcs_vfpcc <4 x i32> @vabd_u32(<4 x i32> %src1, <4 x i32> %src2) {
|
||||
; CHECK-LABEL: vabd_u32:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov.f32 s8, s6
|
||||
; CHECK-NEXT: vmov.i64 q4, #0xffffffff
|
||||
; CHECK-NEXT: vmov.f32 s12, s2
|
||||
; CHECK-NEXT: vmov.f32 s10, s7
|
||||
; CHECK-NEXT: vmov.f32 s14, s3
|
||||
; CHECK-NEXT: vand q2, q2, q4
|
||||
; CHECK-NEXT: vand q3, q3, q4
|
||||
; CHECK-NEXT: vmov r0, r1, d4
|
||||
; CHECK-NEXT: vmov r2, r3, d6
|
||||
; CHECK-NEXT: vmov.f32 s6, s5
|
||||
; CHECK-NEXT: vmov.f32 s2, s1
|
||||
; CHECK-NEXT: vand q1, q1, q4
|
||||
; CHECK-NEXT: vand q4, q0, q4
|
||||
; CHECK-NEXT: subs r0, r2, r0
|
||||
; CHECK-NEXT: sbc.w r1, r3, r1
|
||||
; CHECK-NEXT: add.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: eor.w r12, r0, r1, asr #31
|
||||
; CHECK-NEXT: vmov r1, r2, d2
|
||||
; CHECK-NEXT: vmov r3, r0, d8
|
||||
; CHECK-NEXT: subs r1, r3, r1
|
||||
; CHECK-NEXT: sbcs r0, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d7
|
||||
; CHECK-NEXT: add.w r1, r1, r0, asr #31
|
||||
; CHECK-NEXT: eor.w r0, r1, r0, asr #31
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r12
|
||||
; CHECK-NEXT: vmov r0, r1, d5
|
||||
; CHECK-NEXT: subs r0, r2, r0
|
||||
; CHECK-NEXT: sbc.w r1, r3, r1
|
||||
; CHECK-NEXT: add.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: eor.w r12, r0, r1, asr #31
|
||||
; CHECK-NEXT: vmov r1, r2, d3
|
||||
; CHECK-NEXT: vmov r3, r0, d9
|
||||
; CHECK-NEXT: subs r1, r3, r1
|
||||
; CHECK-NEXT: sbcs r0, r2
|
||||
; CHECK-NEXT: add.w r1, r1, r0, asr #31
|
||||
; CHECK-NEXT: eor.w r0, r1, r0, asr #31
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r12
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: vabd.u32 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
%zextsrc1 = zext <4 x i32> %src1 to <4 x i64>
|
||||
%zextsrc2 = zext <4 x i32> %src2 to <4 x i64>
|
||||
|
@ -209,26 +99,37 @@ define void @vabd_loop_s8(i8* nocapture readonly %x, i8* nocapture readonly %y,
|
|||
; CHECK-NEXT: mov.w lr, #64
|
||||
; CHECK-NEXT: .LBB6_1: @ %vector.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrb.s32 q0, [r1, #12]
|
||||
; CHECK-NEXT: vldrb.s32 q1, [r0, #12]
|
||||
; CHECK-NEXT: vsub.i32 q0, q1, q0
|
||||
; CHECK-NEXT: vldrb.s32 q1, [r0, #8]
|
||||
; CHECK-NEXT: vabs.s32 q0, q0
|
||||
; CHECK-NEXT: vstrb.32 q0, [r2, #12]
|
||||
; CHECK-NEXT: vldrb.s32 q0, [r1, #8]
|
||||
; CHECK-NEXT: vsub.i32 q0, q1, q0
|
||||
; CHECK-NEXT: vldrb.s32 q1, [r0, #4]
|
||||
; CHECK-NEXT: vabs.s32 q0, q0
|
||||
; CHECK-NEXT: vstrb.32 q0, [r2, #8]
|
||||
; CHECK-NEXT: vldrb.s32 q0, [r1, #4]
|
||||
; CHECK-NEXT: vsub.i32 q0, q1, q0
|
||||
; CHECK-NEXT: vldrb.s32 q1, [r0], #16
|
||||
; CHECK-NEXT: vabs.s32 q0, q0
|
||||
; CHECK-NEXT: vstrb.32 q0, [r2, #4]
|
||||
; CHECK-NEXT: vldrb.s32 q0, [r1], #16
|
||||
; CHECK-NEXT: vsub.i32 q0, q1, q0
|
||||
; CHECK-NEXT: vabs.s32 q0, q0
|
||||
; CHECK-NEXT: vstrb.32 q0, [r2], #16
|
||||
; CHECK-NEXT: vldrb.u8 q0, [r1], #16
|
||||
; CHECK-NEXT: vldrb.u8 q1, [r0], #16
|
||||
; CHECK-NEXT: vabd.s8 q0, q1, q0
|
||||
; CHECK-NEXT: vmov.u8 r12, q0[14]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[12]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r12
|
||||
; CHECK-NEXT: vmov.u8 r12, q0[15]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[13]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r12
|
||||
; CHECK-NEXT: vmov.u8 r12, q0[10]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[8]
|
||||
; CHECK-NEXT: vstrb.32 q1, [r2, #12]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r12
|
||||
; CHECK-NEXT: vmov.u8 r12, q0[11]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[9]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r12
|
||||
; CHECK-NEXT: vmov.u8 r12, q0[6]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[4]
|
||||
; CHECK-NEXT: vstrb.32 q1, [r2, #8]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r12
|
||||
; CHECK-NEXT: vmov.u8 r12, q0[7]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[5]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r12
|
||||
; CHECK-NEXT: vmov.u8 r12, q0[2]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[0]
|
||||
; CHECK-NEXT: vstrb.32 q1, [r2, #4]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r12
|
||||
; CHECK-NEXT: vmov.u8 r12, q0[3]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[1]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r12
|
||||
; CHECK-NEXT: vstrb.32 q1, [r2], #16
|
||||
; CHECK-NEXT: le lr, .LBB6_1
|
||||
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
|
@ -269,16 +170,23 @@ define void @vabd_loop_s16(i16* nocapture readonly %x, i16* nocapture readonly %
|
|||
; CHECK-NEXT: mov.w lr, #128
|
||||
; CHECK-NEXT: .LBB7_1: @ %vector.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrh.s32 q0, [r1, #8]
|
||||
; CHECK-NEXT: vldrh.s32 q1, [r0, #8]
|
||||
; CHECK-NEXT: vsub.i32 q0, q1, q0
|
||||
; CHECK-NEXT: vldrh.s32 q1, [r0], #16
|
||||
; CHECK-NEXT: vabs.s32 q0, q0
|
||||
; CHECK-NEXT: vstrh.32 q0, [r2, #8]
|
||||
; CHECK-NEXT: vldrh.s32 q0, [r1], #16
|
||||
; CHECK-NEXT: vsub.i32 q0, q1, q0
|
||||
; CHECK-NEXT: vabs.s32 q0, q0
|
||||
; CHECK-NEXT: vstrh.32 q0, [r2], #16
|
||||
; CHECK-NEXT: vldrh.u16 q0, [r1], #16
|
||||
; CHECK-NEXT: vldrh.u16 q1, [r0], #16
|
||||
; CHECK-NEXT: vabd.s16 q0, q1, q0
|
||||
; CHECK-NEXT: vmov.u16 r12, q0[6]
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[4]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r12
|
||||
; CHECK-NEXT: vmov.u16 r12, q0[7]
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[5]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r12
|
||||
; CHECK-NEXT: vmov.u16 r12, q0[2]
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[0]
|
||||
; CHECK-NEXT: vstrh.32 q1, [r2, #8]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r12
|
||||
; CHECK-NEXT: vmov.u16 r12, q0[3]
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[1]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r12
|
||||
; CHECK-NEXT: vstrh.32 q1, [r2], #16
|
||||
; CHECK-NEXT: le lr, .LBB7_1
|
||||
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
|
@ -419,26 +327,37 @@ define void @vabd_loop_u8(i8* nocapture readonly %x, i8* nocapture readonly %y,
|
|||
; CHECK-NEXT: mov.w lr, #64
|
||||
; CHECK-NEXT: .LBB9_1: @ %vector.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrb.u32 q0, [r1, #12]
|
||||
; CHECK-NEXT: vldrb.u32 q1, [r0, #12]
|
||||
; CHECK-NEXT: vsub.i32 q0, q1, q0
|
||||
; CHECK-NEXT: vldrb.u32 q1, [r0, #8]
|
||||
; CHECK-NEXT: vabs.s32 q0, q0
|
||||
; CHECK-NEXT: vstrb.32 q0, [r2, #12]
|
||||
; CHECK-NEXT: vldrb.u32 q0, [r1, #8]
|
||||
; CHECK-NEXT: vsub.i32 q0, q1, q0
|
||||
; CHECK-NEXT: vldrb.u32 q1, [r0, #4]
|
||||
; CHECK-NEXT: vabs.s32 q0, q0
|
||||
; CHECK-NEXT: vstrb.32 q0, [r2, #8]
|
||||
; CHECK-NEXT: vldrb.u32 q0, [r1, #4]
|
||||
; CHECK-NEXT: vsub.i32 q0, q1, q0
|
||||
; CHECK-NEXT: vldrb.u32 q1, [r0], #16
|
||||
; CHECK-NEXT: vabs.s32 q0, q0
|
||||
; CHECK-NEXT: vstrb.32 q0, [r2, #4]
|
||||
; CHECK-NEXT: vldrb.u32 q0, [r1], #16
|
||||
; CHECK-NEXT: vsub.i32 q0, q1, q0
|
||||
; CHECK-NEXT: vabs.s32 q0, q0
|
||||
; CHECK-NEXT: vstrb.32 q0, [r2], #16
|
||||
; CHECK-NEXT: vldrb.u8 q0, [r1], #16
|
||||
; CHECK-NEXT: vldrb.u8 q1, [r0], #16
|
||||
; CHECK-NEXT: vabd.u8 q0, q1, q0
|
||||
; CHECK-NEXT: vmov.u8 r12, q0[14]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[12]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r12
|
||||
; CHECK-NEXT: vmov.u8 r12, q0[15]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[13]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r12
|
||||
; CHECK-NEXT: vmov.u8 r12, q0[10]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[8]
|
||||
; CHECK-NEXT: vstrb.32 q1, [r2, #12]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r12
|
||||
; CHECK-NEXT: vmov.u8 r12, q0[11]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[9]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r12
|
||||
; CHECK-NEXT: vmov.u8 r12, q0[6]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[4]
|
||||
; CHECK-NEXT: vstrb.32 q1, [r2, #8]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r12
|
||||
; CHECK-NEXT: vmov.u8 r12, q0[7]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[5]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r12
|
||||
; CHECK-NEXT: vmov.u8 r12, q0[2]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[0]
|
||||
; CHECK-NEXT: vstrb.32 q1, [r2, #4]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r12
|
||||
; CHECK-NEXT: vmov.u8 r12, q0[3]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[1]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r12
|
||||
; CHECK-NEXT: vstrb.32 q1, [r2], #16
|
||||
; CHECK-NEXT: le lr, .LBB9_1
|
||||
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
|
@ -479,16 +398,23 @@ define void @vabd_loop_u16(i16* nocapture readonly %x, i16* nocapture readonly %
|
|||
; CHECK-NEXT: mov.w lr, #128
|
||||
; CHECK-NEXT: .LBB10_1: @ %vector.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrh.u32 q0, [r1, #8]
|
||||
; CHECK-NEXT: vldrh.u32 q1, [r0, #8]
|
||||
; CHECK-NEXT: vsub.i32 q0, q1, q0
|
||||
; CHECK-NEXT: vldrh.u32 q1, [r0], #16
|
||||
; CHECK-NEXT: vabs.s32 q0, q0
|
||||
; CHECK-NEXT: vstrh.32 q0, [r2, #8]
|
||||
; CHECK-NEXT: vldrh.u32 q0, [r1], #16
|
||||
; CHECK-NEXT: vsub.i32 q0, q1, q0
|
||||
; CHECK-NEXT: vabs.s32 q0, q0
|
||||
; CHECK-NEXT: vstrh.32 q0, [r2], #16
|
||||
; CHECK-NEXT: vldrh.u16 q0, [r1], #16
|
||||
; CHECK-NEXT: vldrh.u16 q1, [r0], #16
|
||||
; CHECK-NEXT: vabd.u16 q0, q1, q0
|
||||
; CHECK-NEXT: vmov.u16 r12, q0[6]
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[4]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r12
|
||||
; CHECK-NEXT: vmov.u16 r12, q0[7]
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[5]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r12
|
||||
; CHECK-NEXT: vmov.u16 r12, q0[2]
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[0]
|
||||
; CHECK-NEXT: vstrh.32 q1, [r2, #8]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r12
|
||||
; CHECK-NEXT: vmov.u16 r12, q0[3]
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[1]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r12
|
||||
; CHECK-NEXT: vstrh.32 q1, [r2], #16
|
||||
; CHECK-NEXT: le lr, .LBB10_1
|
||||
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
|
|
Loading…
Reference in New Issue