forked from OSchip/llvm-project
[ARM][MVE] Add patterns for VRHADD
Add patterns which use standard add nodes along with arm vshr imm nodes. Differential Revision: https://reviews.llvm.org/D77069
This commit is contained in:
parent
3ce0ad1b33
commit
62f97123fb
|
@ -2015,6 +2015,26 @@ class MVE_VRHADD_Base<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
|
|||
let validForTailPredication = 1;
|
||||
}
|
||||
|
||||
def addnuw : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(add node:$lhs, node:$rhs), [{
|
||||
return N->getFlags().hasNoUnsignedWrap();
|
||||
}]>;
|
||||
|
||||
def addnsw : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(add node:$lhs, node:$rhs), [{
|
||||
return N->getFlags().hasNoSignedWrap();
|
||||
}]>;
|
||||
|
||||
def subnuw : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(sub node:$lhs, node:$rhs), [{
|
||||
return N->getFlags().hasNoUnsignedWrap();
|
||||
}]>;
|
||||
|
||||
def subnsw : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(sub node:$lhs, node:$rhs), [{
|
||||
return N->getFlags().hasNoSignedWrap();
|
||||
}]>;
|
||||
|
||||
multiclass MVE_VRHADD_m<MVEVectorVTInfo VTI,
|
||||
SDNode unpred_op, Intrinsic pred_int> {
|
||||
def "" : MVE_VRHADD_Base<VTI.Suffix, VTI.Unsigned, VTI.Size>;
|
||||
|
@ -2046,6 +2066,37 @@ defm MVE_VRHADDu8 : MVE_VRHADD<MVE_v16u8>;
|
|||
defm MVE_VRHADDu16 : MVE_VRHADD<MVE_v8u16>;
|
||||
defm MVE_VRHADDu32 : MVE_VRHADD<MVE_v4u32>;
|
||||
|
||||
// Rounding Halving Add perform the arithemtic operation with an extra bit of
|
||||
// precision, before performing the shift, to void clipping errors. We're not
|
||||
// modelling that here with these patterns, but we're using no wrap forms of
|
||||
// add to ensure that the extra bit of information is not needed for the
|
||||
// arithmetic or the rounding.
|
||||
def : Pat<(v16i8 (ARMvshrsImm (addnsw (addnsw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
|
||||
(v16i8 (ARMvmovImm (i32 3585)))),
|
||||
(i32 1))),
|
||||
(MVE_VRHADDs8 MQPR:$Qm, MQPR:$Qn)>;
|
||||
def : Pat<(v8i16 (ARMvshrsImm (addnsw (addnsw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
|
||||
(v8i16 (ARMvmovImm (i32 2049)))),
|
||||
(i32 1))),
|
||||
(MVE_VRHADDs16 MQPR:$Qm, MQPR:$Qn)>;
|
||||
def : Pat<(v4i32 (ARMvshrsImm (addnsw (addnsw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
|
||||
(v4i32 (ARMvmovImm (i32 1)))),
|
||||
(i32 1))),
|
||||
(MVE_VRHADDs32 MQPR:$Qm, MQPR:$Qn)>;
|
||||
def : Pat<(v16i8 (ARMvshruImm (addnuw (addnuw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
|
||||
(v16i8 (ARMvmovImm (i32 3585)))),
|
||||
(i32 1))),
|
||||
(MVE_VRHADDu8 MQPR:$Qm, MQPR:$Qn)>;
|
||||
def : Pat<(v8i16 (ARMvshruImm (addnuw (addnuw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
|
||||
(v8i16 (ARMvmovImm (i32 2049)))),
|
||||
(i32 1))),
|
||||
(MVE_VRHADDu16 MQPR:$Qm, MQPR:$Qn)>;
|
||||
def : Pat<(v4i32 (ARMvshruImm (addnuw (addnuw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
|
||||
(v4i32 (ARMvmovImm (i32 1)))),
|
||||
(i32 1))),
|
||||
(MVE_VRHADDu32 MQPR:$Qm, MQPR:$Qn)>;
|
||||
|
||||
|
||||
class MVE_VHADDSUB<string iname, string suffix, bit U, bit subtract,
|
||||
bits<2> size, list<dag> pattern=[]>
|
||||
: MVE_int<iname, suffix, size, pattern> {
|
||||
|
@ -2095,26 +2146,6 @@ multiclass MVE_VHADD<MVEVectorVTInfo VTI, PatFrag add_op, SDNode shift_op>
|
|||
: MVE_VHADD_m<VTI, int_arm_mve_vhadd, int_arm_mve_hadd_predicated, add_op,
|
||||
shift_op>;
|
||||
|
||||
def addnuw : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(add node:$lhs, node:$rhs), [{
|
||||
return N->getFlags().hasNoUnsignedWrap();
|
||||
}]>;
|
||||
|
||||
def addnsw : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(add node:$lhs, node:$rhs), [{
|
||||
return N->getFlags().hasNoSignedWrap();
|
||||
}]>;
|
||||
|
||||
def subnuw : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(sub node:$lhs, node:$rhs), [{
|
||||
return N->getFlags().hasNoUnsignedWrap();
|
||||
}]>;
|
||||
|
||||
def subnsw : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(sub node:$lhs, node:$rhs), [{
|
||||
return N->getFlags().hasNoSignedWrap();
|
||||
}]>;
|
||||
|
||||
// Halving add/sub perform the arithemtic operation with an extra bit of
|
||||
// precision, before performing the shift, to void clipping errors. We're not
|
||||
// modelling that here with these patterns, but we're using no wrap forms of
|
||||
|
|
|
@ -230,3 +230,291 @@ define arm_aapcs_vfpcc <4 x i32> @vhsubu_v4i32_nw(<4 x i32> %x, <4 x i32> %y) {
|
|||
%half = lshr <4 x i32> %sub, <i32 1, i32 1, i32 1, i32 1>
|
||||
ret <4 x i32> %half
|
||||
}
|
||||
define arm_aapcs_vfpcc <16 x i8> @vrhadds_v16i8(<16 x i8> %x, <16 x i8> %y) {
|
||||
; CHECK-LABEL: vrhadds_v16i8:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vadd.i8 q0, q0, q1
|
||||
; CHECK-NEXT: vmov.i8 q1, #0x1
|
||||
; CHECK-NEXT: vadd.i8 q0, q0, q1
|
||||
; CHECK-NEXT: vshr.s8 q0, q0, #1
|
||||
; CHECK-NEXT: bx lr
|
||||
%add = add <16 x i8> %x, %y
|
||||
%round = add <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
%half = ashr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
ret <16 x i8> %half
|
||||
}
|
||||
define arm_aapcs_vfpcc <16 x i8> @vrhaddu_v16i8(<16 x i8> %x, <16 x i8> %y) {
|
||||
; CHECK-LABEL: vrhaddu_v16i8:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vadd.i8 q0, q0, q1
|
||||
; CHECK-NEXT: vmov.i8 q1, #0x1
|
||||
; CHECK-NEXT: vadd.i8 q0, q0, q1
|
||||
; CHECK-NEXT: vshr.u8 q0, q0, #1
|
||||
; CHECK-NEXT: bx lr
|
||||
%add = add <16 x i8> %x, %y
|
||||
%round = add <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
%half = lshr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
ret <16 x i8> %half
|
||||
}
|
||||
define arm_aapcs_vfpcc <8 x i16> @vrhadds_v8i16(<8 x i16> %x, <8 x i16> %y) {
|
||||
; CHECK-LABEL: vrhadds_v8i16:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vadd.i16 q0, q0, q1
|
||||
; CHECK-NEXT: vmov.i16 q1, #0x1
|
||||
; CHECK-NEXT: vadd.i16 q0, q0, q1
|
||||
; CHECK-NEXT: vshr.s16 q0, q0, #1
|
||||
; CHECK-NEXT: bx lr
|
||||
%add = add <8 x i16> %x, %y
|
||||
%round = add <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
%half = ashr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
ret <8 x i16> %half
|
||||
}
|
||||
define arm_aapcs_vfpcc <8 x i16> @vrhaddu_v8i16(<8 x i16> %x, <8 x i16> %y) {
|
||||
; CHECK-LABEL: vrhaddu_v8i16:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vadd.i16 q0, q0, q1
|
||||
; CHECK-NEXT: vmov.i16 q1, #0x1
|
||||
; CHECK-NEXT: vadd.i16 q0, q0, q1
|
||||
; CHECK-NEXT: vshr.u16 q0, q0, #1
|
||||
; CHECK-NEXT: bx lr
|
||||
%add = add <8 x i16> %x, %y
|
||||
%round = add <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
%half = lshr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
ret <8 x i16> %half
|
||||
}
|
||||
define arm_aapcs_vfpcc <4 x i32> @vrhadds_v4i32(<4 x i32> %x, <4 x i32> %y) {
|
||||
; CHECK-LABEL: vrhadds_v4i32:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x1
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vshr.s32 q0, q0, #1
|
||||
; CHECK-NEXT: bx lr
|
||||
%add = add <4 x i32> %x, %y
|
||||
%round = add <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
|
||||
%half = ashr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1>
|
||||
ret <4 x i32> %half
|
||||
}
|
||||
define arm_aapcs_vfpcc <4 x i32> @vrhaddu_v4i32(<4 x i32> %x, <4 x i32> %y) {
|
||||
; CHECK-LABEL: vrhaddu_v4i32:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x1
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vshr.u32 q0, q0, #1
|
||||
; CHECK-NEXT: bx lr
|
||||
%add = add <4 x i32> %x, %y
|
||||
%round = add <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
|
||||
%half = lshr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1>
|
||||
ret <4 x i32> %half
|
||||
}
|
||||
define arm_aapcs_vfpcc <16 x i8> @vrhadds_v16i8_nwop(<16 x i8> %x, <16 x i8> %y) {
|
||||
; CHECK-LABEL: vrhadds_v16i8_nwop:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vadd.i8 q0, q0, q1
|
||||
; CHECK-NEXT: vmov.i8 q1, #0x1
|
||||
; CHECK-NEXT: vadd.i8 q0, q0, q1
|
||||
; CHECK-NEXT: vshr.s8 q0, q0, #1
|
||||
; CHECK-NEXT: bx lr
|
||||
%add = add nsw <16 x i8> %x, %y
|
||||
%round = add <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
%half = ashr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
ret <16 x i8> %half
|
||||
}
|
||||
define arm_aapcs_vfpcc <16 x i8> @vrhaddu_v16i8_nwop(<16 x i8> %x, <16 x i8> %y) {
|
||||
; CHECK-LABEL: vrhaddu_v16i8_nwop:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vadd.i8 q0, q0, q1
|
||||
; CHECK-NEXT: vmov.i8 q1, #0x1
|
||||
; CHECK-NEXT: vadd.i8 q0, q0, q1
|
||||
; CHECK-NEXT: vshr.u8 q0, q0, #1
|
||||
; CHECK-NEXT: bx lr
|
||||
%add = add nuw <16 x i8> %x, %y
|
||||
%round = add <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
%half = lshr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
ret <16 x i8> %half
|
||||
}
|
||||
define arm_aapcs_vfpcc <8 x i16> @vrhadds_v8i16_nwop(<8 x i16> %x, <8 x i16> %y) {
|
||||
; CHECK-LABEL: vrhadds_v8i16_nwop:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vadd.i16 q0, q0, q1
|
||||
; CHECK-NEXT: vmov.i16 q1, #0x1
|
||||
; CHECK-NEXT: vadd.i16 q0, q0, q1
|
||||
; CHECK-NEXT: vshr.s16 q0, q0, #1
|
||||
; CHECK-NEXT: bx lr
|
||||
%add = add nsw <8 x i16> %x, %y
|
||||
%round = add <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
%half = ashr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
ret <8 x i16> %half
|
||||
}
|
||||
define arm_aapcs_vfpcc <8 x i16> @vrhaddu_v8i16_nwop(<8 x i16> %x, <8 x i16> %y) {
|
||||
; CHECK-LABEL: vrhaddu_v8i16_nwop:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vadd.i16 q0, q0, q1
|
||||
; CHECK-NEXT: vmov.i16 q1, #0x1
|
||||
; CHECK-NEXT: vadd.i16 q0, q0, q1
|
||||
; CHECK-NEXT: vshr.u16 q0, q0, #1
|
||||
; CHECK-NEXT: bx lr
|
||||
%add = add nuw <8 x i16> %x, %y
|
||||
%round = add <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
%half = lshr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
ret <8 x i16> %half
|
||||
}
|
||||
define arm_aapcs_vfpcc <4 x i32> @vrhadds_v4i32_nwop(<4 x i32> %x, <4 x i32> %y) {
|
||||
; CHECK-LABEL: vrhadds_v4i32_nwop:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x1
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vshr.s32 q0, q0, #1
|
||||
; CHECK-NEXT: bx lr
|
||||
%add = add nsw <4 x i32> %x, %y
|
||||
%round = add <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
|
||||
%half = ashr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1>
|
||||
ret <4 x i32> %half
|
||||
}
|
||||
define arm_aapcs_vfpcc <4 x i32> @vrhaddu_v4i32_nwop(<4 x i32> %x, <4 x i32> %y) {
|
||||
; CHECK-LABEL: vrhaddu_v4i32_nwop:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x1
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vshr.u32 q0, q0, #1
|
||||
; CHECK-NEXT: bx lr
|
||||
%add = add nuw <4 x i32> %x, %y
|
||||
%round = add <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
|
||||
%half = lshr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1>
|
||||
ret <4 x i32> %half
|
||||
}
|
||||
define arm_aapcs_vfpcc <16 x i8> @vrhadds_v16i8_nwrnd(<16 x i8> %x, <16 x i8> %y) {
|
||||
; CHECK-LABEL: vrhadds_v16i8_nwrnd:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vadd.i8 q0, q0, q1
|
||||
; CHECK-NEXT: vmov.i8 q1, #0x1
|
||||
; CHECK-NEXT: vhadd.s8 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
%add = add <16 x i8> %x, %y
|
||||
%round = add nsw <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
%half = ashr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
ret <16 x i8> %half
|
||||
}
|
||||
define arm_aapcs_vfpcc <16 x i8> @vrhaddu_v16i8_nwrnd(<16 x i8> %x, <16 x i8> %y) {
|
||||
; CHECK-LABEL: vrhaddu_v16i8_nwrnd:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vadd.i8 q0, q0, q1
|
||||
; CHECK-NEXT: vmov.i8 q1, #0x1
|
||||
; CHECK-NEXT: vhadd.u8 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
%add = add <16 x i8> %x, %y
|
||||
%round = add nuw <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
%half = lshr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
ret <16 x i8> %half
|
||||
}
|
||||
define arm_aapcs_vfpcc <8 x i16> @vrhadds_v8i16_nwrnd(<8 x i16> %x, <8 x i16> %y) {
|
||||
; CHECK-LABEL: vrhadds_v8i16_nwrnd:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vadd.i16 q0, q0, q1
|
||||
; CHECK-NEXT: vmov.i16 q1, #0x1
|
||||
; CHECK-NEXT: vhadd.s16 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
%add = add <8 x i16> %x, %y
|
||||
%round = add nsw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
%half = ashr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
ret <8 x i16> %half
|
||||
}
|
||||
define arm_aapcs_vfpcc <8 x i16> @vrhaddu_v8i16_nwrnd(<8 x i16> %x, <8 x i16> %y) {
|
||||
; CHECK-LABEL: vrhaddu_v8i16_nwrnd:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vadd.i16 q0, q0, q1
|
||||
; CHECK-NEXT: vmov.i16 q1, #0x1
|
||||
; CHECK-NEXT: vhadd.u16 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
%add = add <8 x i16> %x, %y
|
||||
%round = add nuw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
%half = lshr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
ret <8 x i16> %half
|
||||
}
|
||||
define arm_aapcs_vfpcc <4 x i32> @vrhadds_v4i32_nwrnd(<4 x i32> %x, <4 x i32> %y) {
|
||||
; CHECK-LABEL: vrhadds_v4i32_nwrnd:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x1
|
||||
; CHECK-NEXT: vhadd.s32 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
%add = add <4 x i32> %x, %y
|
||||
%round = add nsw <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
|
||||
%half = ashr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1>
|
||||
ret <4 x i32> %half
|
||||
}
|
||||
define arm_aapcs_vfpcc <4 x i32> @vrhaddu_v4i32_nwrnd(<4 x i32> %x, <4 x i32> %y) {
|
||||
; CHECK-LABEL: vrhaddu_v4i32_nwrnd:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x1
|
||||
; CHECK-NEXT: vhadd.u32 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
%add = add <4 x i32> %x, %y
|
||||
%round = add nuw <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
|
||||
%half = lshr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1>
|
||||
ret <4 x i32> %half
|
||||
}
|
||||
define arm_aapcs_vfpcc <16 x i8> @vrhadds_v16i8_both_nw(<16 x i8> %x, <16 x i8> %y) {
|
||||
; CHECK-LABEL: vrhadds_v16i8_both_nw:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vrhadd.s8 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
%add = add nsw <16 x i8> %x, %y
|
||||
%round = add nsw <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
%half = ashr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
ret <16 x i8> %half
|
||||
}
|
||||
define arm_aapcs_vfpcc <16 x i8> @vrhaddu_v16i8_both_nw(<16 x i8> %x, <16 x i8> %y) {
|
||||
; CHECK-LABEL: vrhaddu_v16i8_both_nw:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vrhadd.u8 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
%add = add nuw <16 x i8> %x, %y
|
||||
%round = add nuw <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
%half = lshr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
ret <16 x i8> %half
|
||||
}
|
||||
define arm_aapcs_vfpcc <8 x i16> @vrhadds_v8i16_both_nw(<8 x i16> %x, <8 x i16> %y) {
|
||||
; CHECK-LABEL: vrhadds_v8i16_both_nw:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vrhadd.s16 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
%add = add nsw <8 x i16> %x, %y
|
||||
%round = add nsw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
%half = ashr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
ret <8 x i16> %half
|
||||
}
|
||||
define arm_aapcs_vfpcc <8 x i16> @vrhaddu_v8i16_both_nw(<8 x i16> %x, <8 x i16> %y) {
|
||||
; CHECK-LABEL: vrhaddu_v8i16_both_nw:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vrhadd.u16 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
%add = add nuw <8 x i16> %x, %y
|
||||
%round = add nuw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
%half = lshr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
ret <8 x i16> %half
|
||||
}
|
||||
define arm_aapcs_vfpcc <4 x i32> @vrhadds_v4i32_both_nw(<4 x i32> %x, <4 x i32> %y) {
|
||||
; CHECK-LABEL: vrhadds_v4i32_both_nw:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vrhadd.s32 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
%add = add nsw <4 x i32> %x, %y
|
||||
%round = add nsw <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
|
||||
%half = ashr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1>
|
||||
ret <4 x i32> %half
|
||||
}
|
||||
define arm_aapcs_vfpcc <4 x i32> @vrhaddu_v4i32_both_nw(<4 x i32> %x, <4 x i32> %y) {
|
||||
; CHECK-LABEL: vrhaddu_v4i32_both_nw:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vrhadd.u32 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
%add = add nuw <4 x i32> %x, %y
|
||||
%round = add nuw <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
|
||||
%half = lshr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1>
|
||||
ret <4 x i32> %half
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue