forked from OSchip/llvm-project
[ARM] Extra MVE select(binop) patterns
This is very similar to 243970d03cace2, but handling a slightly different form of predicated operations. When starting with a pattern of the form select(p, BinOp(x, y), x), Instcombine will often transform this to BinOp(x, select(p, y, 0)), where 0 is the identity value of the binop (0 for adds/subs, 1 for muls, -1 for ands etc). This adds the patterns that transforms those back into predicated binary operations. There is also a very minor adjustment to tablegen null_frag in here, to allow it to also be recognized as a PatLeaf node, so that it can be used in MVE_TwoOpPattern to easily exclude the cases where we do not need the alternate transform. Differential Revision: https://reviews.llvm.org/D84091
This commit is contained in:
parent
98b56c09be
commit
f8abecf337
|
@ -498,6 +498,18 @@ def SubReg_i32_lane : SDNodeXForm<imm, [{
|
|||
}]>;
|
||||
|
||||
|
||||
def ARMimmAllZerosV: PatLeaf<(bitconvert (v4i32 (ARMvmovImm (i32 0))))>;
|
||||
def ARMimmAllZerosD: PatLeaf<(bitconvert (v2i32 (ARMvmovImm (i32 0))))>;
|
||||
def ARMimmAllOnesV: PatLeaf<(bitconvert (v16i8 (ARMvmovImm (i32 0xEFF))))>;
|
||||
def ARMimmAllOnesD: PatLeaf<(bitconvert (v8i8 (ARMvmovImm (i32 0xEFF))))>;
|
||||
|
||||
def ARMimmOneV: PatLeaf<(ARMvmovImm (i32 timm)), [{
|
||||
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
|
||||
unsigned EltBits = 0;
|
||||
uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits);
|
||||
return (EltBits == N->getValueType(0).getScalarSizeInBits() && EltVal == 0x01);
|
||||
}]>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Operand Definitions.
|
||||
|
|
|
@ -318,9 +318,9 @@ def MVE_v2f64 : MVEVectorVTInfo<v2f64, ?, v4i1, ?, 0b11, "f", ?>;
|
|||
def MVE_v16p8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, v8i1, 0b11, "p", 0b0>;
|
||||
def MVE_v8p16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, v4i1, 0b11, "p", 0b1>;
|
||||
|
||||
|
||||
multiclass MVE_TwoOpPattern<MVEVectorVTInfo VTI, PatFrag Op, Intrinsic PredInt,
|
||||
dag PredOperands, Instruction Inst> {
|
||||
dag PredOperands, Instruction Inst,
|
||||
SDPatternOperator IdentityVec = null_frag> {
|
||||
// Unpredicated
|
||||
def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
|
||||
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
|
||||
|
@ -334,6 +334,15 @@ multiclass MVE_TwoOpPattern<MVEVectorVTInfo VTI, PatFrag Op, Intrinsic PredInt,
|
|||
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
|
||||
ARMVCCThen, (VTI.Pred VCCR:$mask),
|
||||
(VTI.Vec MQPR:$inactive)))>;
|
||||
|
||||
// Optionally with the select folded through the op
|
||||
def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm),
|
||||
(VTI.Vec (vselect (VTI.Pred VCCR:$mask),
|
||||
(VTI.Vec MQPR:$Qn),
|
||||
(VTI.Vec IdentityVec))))),
|
||||
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
|
||||
ARMVCCThen, (VTI.Pred VCCR:$mask),
|
||||
(VTI.Vec MQPR:$Qm)))>;
|
||||
}
|
||||
|
||||
// Predicated with intrinsic
|
||||
|
@ -346,7 +355,8 @@ multiclass MVE_TwoOpPattern<MVEVectorVTInfo VTI, PatFrag Op, Intrinsic PredInt,
|
|||
}
|
||||
|
||||
multiclass MVE_TwoOpPatternDup<MVEVectorVTInfo VTI, PatFrag Op, Intrinsic PredInt,
|
||||
dag PredOperands, Instruction Inst> {
|
||||
dag PredOperands, Instruction Inst,
|
||||
SDPatternOperator IdentityVec = null_frag> {
|
||||
// Unpredicated
|
||||
def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm), (VTI.Vec (ARMvdup rGPR:$Rn)))),
|
||||
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn))>;
|
||||
|
@ -360,6 +370,15 @@ multiclass MVE_TwoOpPatternDup<MVEVectorVTInfo VTI, PatFrag Op, Intrinsic PredIn
|
|||
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn,
|
||||
ARMVCCThen, (VTI.Pred VCCR:$mask),
|
||||
(VTI.Vec MQPR:$inactive)))>;
|
||||
|
||||
// Optionally with the select folded through the op
|
||||
def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm),
|
||||
(VTI.Vec (vselect (VTI.Pred VCCR:$mask),
|
||||
(ARMvdup rGPR:$Rn),
|
||||
(VTI.Vec IdentityVec))))),
|
||||
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn,
|
||||
ARMVCCThen, (VTI.Pred VCCR:$mask),
|
||||
(VTI.Vec MQPR:$Qm)))>;
|
||||
}
|
||||
|
||||
// Predicated with intrinsic
|
||||
|
@ -1492,20 +1511,20 @@ foreach s=["s8", "s16", "s32", "u8", "u16", "u32", "i8", "i16", "i32", "f16", "f
|
|||
}
|
||||
|
||||
let Predicates = [HasMVEInt] in {
|
||||
defm : MVE_TwoOpPattern<MVE_v16i8, and, int_arm_mve_and_predicated, (? ), MVE_VAND>;
|
||||
defm : MVE_TwoOpPattern<MVE_v8i16, and, int_arm_mve_and_predicated, (? ), MVE_VAND>;
|
||||
defm : MVE_TwoOpPattern<MVE_v4i32, and, int_arm_mve_and_predicated, (? ), MVE_VAND>;
|
||||
defm : MVE_TwoOpPattern<MVE_v2i64, and, int_arm_mve_and_predicated, (? ), MVE_VAND>;
|
||||
defm : MVE_TwoOpPattern<MVE_v16i8, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>;
|
||||
defm : MVE_TwoOpPattern<MVE_v8i16, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>;
|
||||
defm : MVE_TwoOpPattern<MVE_v4i32, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>;
|
||||
defm : MVE_TwoOpPattern<MVE_v2i64, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>;
|
||||
|
||||
defm : MVE_TwoOpPattern<MVE_v16i8, or, int_arm_mve_orr_predicated, (? ), MVE_VORR>;
|
||||
defm : MVE_TwoOpPattern<MVE_v8i16, or, int_arm_mve_orr_predicated, (? ), MVE_VORR>;
|
||||
defm : MVE_TwoOpPattern<MVE_v4i32, or, int_arm_mve_orr_predicated, (? ), MVE_VORR>;
|
||||
defm : MVE_TwoOpPattern<MVE_v2i64, or, int_arm_mve_orr_predicated, (? ), MVE_VORR>;
|
||||
defm : MVE_TwoOpPattern<MVE_v16i8, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>;
|
||||
defm : MVE_TwoOpPattern<MVE_v8i16, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>;
|
||||
defm : MVE_TwoOpPattern<MVE_v4i32, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>;
|
||||
defm : MVE_TwoOpPattern<MVE_v2i64, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>;
|
||||
|
||||
defm : MVE_TwoOpPattern<MVE_v16i8, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR>;
|
||||
defm : MVE_TwoOpPattern<MVE_v8i16, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR>;
|
||||
defm : MVE_TwoOpPattern<MVE_v4i32, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR>;
|
||||
defm : MVE_TwoOpPattern<MVE_v2i64, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR>;
|
||||
defm : MVE_TwoOpPattern<MVE_v16i8, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>;
|
||||
defm : MVE_TwoOpPattern<MVE_v8i16, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>;
|
||||
defm : MVE_TwoOpPattern<MVE_v4i32, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>;
|
||||
defm : MVE_TwoOpPattern<MVE_v2i64, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>;
|
||||
|
||||
defm : MVE_TwoOpPattern<MVE_v16i8, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>,
|
||||
int_arm_mve_bic_predicated, (? ), MVE_VBIC>;
|
||||
|
@ -1775,7 +1794,7 @@ multiclass MVE_VMUL_m<MVEVectorVTInfo VTI> {
|
|||
|
||||
let Predicates = [HasMVEInt] in {
|
||||
defm : MVE_TwoOpPattern<VTI, mul, int_arm_mve_mul_predicated, (? ),
|
||||
!cast<Instruction>(NAME)>;
|
||||
!cast<Instruction>(NAME), ARMimmOneV>;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1849,7 +1868,7 @@ multiclass MVE_VADDSUB_m<string iname, MVEVectorVTInfo VTI, bit subtract,
|
|||
defvar Inst = !cast<Instruction>(NAME);
|
||||
|
||||
let Predicates = [HasMVEInt] in {
|
||||
defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME)>;
|
||||
defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME), ARMimmAllZerosV>;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4984,7 +5003,7 @@ multiclass MVE_VADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract,
|
|||
SDNode Op, Intrinsic PredInt> {
|
||||
def "" : MVE_VADDSUB_qr<iname, VTI.Suffix, VTI.Size, 0b0, subtract, 0b1, 0b0>;
|
||||
let Predicates = [HasMVEInt] in {
|
||||
defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME)>;
|
||||
defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME), ARMimmAllZerosV>;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5270,7 +5289,7 @@ class MVE_VMUL_qr_int<string iname, string suffix, bits<2> size>
|
|||
multiclass MVE_VMUL_qr_int_m<MVEVectorVTInfo VTI> {
|
||||
def "" : MVE_VMUL_qr_int<"vmul", VTI.Suffix, VTI.Size>;
|
||||
defm : MVE_TwoOpPatternDup<VTI, mul, int_arm_mve_mul_predicated, (? ),
|
||||
!cast<Instruction>(NAME)>;
|
||||
!cast<Instruction>(NAME), ARMimmOneV>;
|
||||
}
|
||||
|
||||
defm MVE_VMUL_qr_i8 : MVE_VMUL_qr_int_m<MVE_v16i8>;
|
||||
|
@ -6864,7 +6883,7 @@ class MVE_vector_load_typed<ValueType Ty, Instruction RegImmInst,
|
|||
|
||||
class MVE_vector_maskedload_typed<ValueType Ty, Instruction RegImmInst,
|
||||
PatFrag LoadKind, int shift>
|
||||
: Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr, VCCR:$pred, (Ty NEONimmAllZerosV))),
|
||||
: Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr, VCCR:$pred, (Ty (ARMvmovImm (i32 0))))),
|
||||
(Ty (RegImmInst t2addrmode_imm7<shift>:$addr, ARMVCCThen, VCCR:$pred))>;
|
||||
|
||||
multiclass MVE_vector_load<Instruction RegImmInst, PatFrag LoadKind,
|
||||
|
@ -7031,11 +7050,11 @@ multiclass MVEExtLoadStore<Instruction LoadSInst, Instruction LoadUInst, string
|
|||
(VT (LoadUInst taddrmode_imm7<Shift>:$addr))>;
|
||||
|
||||
// Masked ext loads
|
||||
def : Pat<(VT (!cast<PatFrag>("aligned_extmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT NEONimmAllZerosV))),
|
||||
def : Pat<(VT (!cast<PatFrag>("aligned_extmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))),
|
||||
(VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
|
||||
def : Pat<(VT (!cast<PatFrag>("aligned_sextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT NEONimmAllZerosV))),
|
||||
def : Pat<(VT (!cast<PatFrag>("aligned_sextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))),
|
||||
(VT (LoadSInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
|
||||
def : Pat<(VT (!cast<PatFrag>("aligned_zextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT NEONimmAllZerosV))),
|
||||
def : Pat<(VT (!cast<PatFrag>("aligned_zextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))),
|
||||
(VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
|
||||
}
|
||||
|
||||
|
|
|
@ -534,20 +534,6 @@ def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>;
|
|||
def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>;
|
||||
|
||||
|
||||
def NEONimmAllZerosV: PatLeaf<(ARMvmovImm (i32 timm)), [{
|
||||
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
|
||||
unsigned EltBits = 0;
|
||||
uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits);
|
||||
return (EltBits == 32 && EltVal == 0);
|
||||
}]>;
|
||||
|
||||
def NEONimmAllOnesV: PatLeaf<(ARMvmovImm (i32 timm)), [{
|
||||
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
|
||||
unsigned EltBits = 0;
|
||||
uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits);
|
||||
return (EltBits == 8 && EltVal == 0xff);
|
||||
}]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// NEON load / store instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -5273,9 +5259,9 @@ def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
|
|||
// Vector Bitwise Operations.
|
||||
|
||||
def vnotd : PatFrag<(ops node:$in),
|
||||
(xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
|
||||
(xor node:$in, ARMimmAllOnesD)>;
|
||||
def vnotq : PatFrag<(ops node:$in),
|
||||
(xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
|
||||
(xor node:$in, ARMimmAllOnesV)>;
|
||||
|
||||
|
||||
// VAND : Vector Bitwise AND
|
||||
|
@ -6054,9 +6040,9 @@ defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
|
|||
// Vector Negate.
|
||||
|
||||
def vnegd : PatFrag<(ops node:$in),
|
||||
(sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
|
||||
(sub ARMimmAllZerosD, node:$in)>;
|
||||
def vnegq : PatFrag<(ops node:$in),
|
||||
(sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
|
||||
(sub ARMimmAllZerosV, node:$in)>;
|
||||
|
||||
class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
|
||||
: N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
|
||||
|
@ -6270,11 +6256,11 @@ defm : NEONImmReplicateInstAlias<i32, VMOVv2i32, VMOVv4i32,
|
|||
|
||||
let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
|
||||
def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm,
|
||||
[(set DPR:$Vd, (v2i32 NEONimmAllZerosV))],
|
||||
[(set DPR:$Vd, (v2i32 ARMimmAllZerosD))],
|
||||
(VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>,
|
||||
Requires<[HasZCZ]>;
|
||||
def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm,
|
||||
[(set QPR:$Vd, (v4i32 NEONimmAllZerosV))],
|
||||
[(set QPR:$Vd, (v4i32 ARMimmAllZerosV))],
|
||||
(VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>,
|
||||
Requires<[HasZCZ]>;
|
||||
}
|
||||
|
|
|
@ -4,10 +4,9 @@
|
|||
define arm_aapcs_vfpcc <4 x i32> @add_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
|
||||
; CHECK-LABEL: add_v4i32_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: vctp.32 r0
|
||||
; CHECK-NEXT: vpsel q1, q1, q2
|
||||
; CHECK-NEXT: vadd.i32 q0, q1, q0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vaddt.i32 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
|
||||
|
@ -19,10 +18,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @add_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
|
||||
; CHECK-LABEL: add_v8i16_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: vctp.16 r0
|
||||
; CHECK-NEXT: vpsel q1, q1, q2
|
||||
; CHECK-NEXT: vadd.i16 q0, q1, q0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vaddt.i16 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
|
||||
|
@ -34,10 +32,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <16 x i8> @add_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
|
||||
; CHECK-LABEL: add_v16i8_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: vctp.8 r0
|
||||
; CHECK-NEXT: vpsel q1, q1, q2
|
||||
; CHECK-NEXT: vadd.i8 q0, q1, q0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vaddt.i8 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
|
||||
|
@ -49,10 +46,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @sub_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
|
||||
; CHECK-LABEL: sub_v4i32_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: vctp.32 r0
|
||||
; CHECK-NEXT: vpsel q1, q1, q2
|
||||
; CHECK-NEXT: vsub.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vsubt.i32 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
|
||||
|
@ -64,10 +60,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @sub_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
|
||||
; CHECK-LABEL: sub_v8i16_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: vctp.16 r0
|
||||
; CHECK-NEXT: vpsel q1, q1, q2
|
||||
; CHECK-NEXT: vsub.i16 q0, q0, q1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vsubt.i16 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
|
||||
|
@ -79,10 +74,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <16 x i8> @sub_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
|
||||
; CHECK-LABEL: sub_v16i8_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: vctp.8 r0
|
||||
; CHECK-NEXT: vpsel q1, q1, q2
|
||||
; CHECK-NEXT: vsub.i8 q0, q0, q1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vsubt.i8 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
|
||||
|
@ -94,10 +88,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @mul_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
|
||||
; CHECK-LABEL: mul_v4i32_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x1
|
||||
; CHECK-NEXT: vctp.32 r0
|
||||
; CHECK-NEXT: vpsel q1, q1, q2
|
||||
; CHECK-NEXT: vmul.i32 q0, q1, q0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmult.i32 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
|
||||
|
@ -109,10 +102,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @mul_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
|
||||
; CHECK-LABEL: mul_v8i16_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i16 q2, #0x1
|
||||
; CHECK-NEXT: vctp.16 r0
|
||||
; CHECK-NEXT: vpsel q1, q1, q2
|
||||
; CHECK-NEXT: vmul.i16 q0, q1, q0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmult.i16 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
|
||||
|
@ -124,10 +116,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <16 x i8> @mul_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
|
||||
; CHECK-LABEL: mul_v16i8_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i8 q2, #0x1
|
||||
; CHECK-NEXT: vctp.8 r0
|
||||
; CHECK-NEXT: vpsel q1, q1, q2
|
||||
; CHECK-NEXT: vmul.i8 q0, q1, q0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmult.i8 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
|
||||
|
@ -139,10 +130,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @and_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
|
||||
; CHECK-LABEL: and_v4i32_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i8 q2, #0xff
|
||||
; CHECK-NEXT: vctp.32 r0
|
||||
; CHECK-NEXT: vpsel q1, q1, q2
|
||||
; CHECK-NEXT: vand q0, q1, q0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vandt q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
|
||||
|
@ -154,10 +144,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @and_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
|
||||
; CHECK-LABEL: and_v8i16_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i8 q2, #0xff
|
||||
; CHECK-NEXT: vctp.16 r0
|
||||
; CHECK-NEXT: vpsel q1, q1, q2
|
||||
; CHECK-NEXT: vand q0, q1, q0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vandt q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
|
||||
|
@ -169,10 +158,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <16 x i8> @and_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
|
||||
; CHECK-LABEL: and_v16i8_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i8 q2, #0xff
|
||||
; CHECK-NEXT: vctp.8 r0
|
||||
; CHECK-NEXT: vpsel q1, q1, q2
|
||||
; CHECK-NEXT: vand q0, q1, q0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vandt q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
|
||||
|
@ -184,10 +172,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @or_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
|
||||
; CHECK-LABEL: or_v4i32_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: vctp.32 r0
|
||||
; CHECK-NEXT: vpsel q1, q1, q2
|
||||
; CHECK-NEXT: vorr q0, q1, q0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vorrt q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
|
||||
|
@ -199,10 +186,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @or_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
|
||||
; CHECK-LABEL: or_v8i16_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: vctp.16 r0
|
||||
; CHECK-NEXT: vpsel q1, q1, q2
|
||||
; CHECK-NEXT: vorr q0, q1, q0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vorrt q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
|
||||
|
@ -214,10 +200,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <16 x i8> @or_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
|
||||
; CHECK-LABEL: or_v16i8_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: vctp.8 r0
|
||||
; CHECK-NEXT: vpsel q1, q1, q2
|
||||
; CHECK-NEXT: vorr q0, q1, q0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vorrt q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
|
||||
|
@ -229,10 +214,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @xor_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
|
||||
; CHECK-LABEL: xor_v4i32_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: vctp.32 r0
|
||||
; CHECK-NEXT: vpsel q1, q1, q2
|
||||
; CHECK-NEXT: veor q0, q1, q0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: veort q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
|
||||
|
@ -244,10 +228,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
|
||||
; CHECK-LABEL: xor_v8i16_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: vctp.16 r0
|
||||
; CHECK-NEXT: vpsel q1, q1, q2
|
||||
; CHECK-NEXT: veor q0, q1, q0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: veort q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
|
||||
|
@ -259,10 +242,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <16 x i8> @xor_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
|
||||
; CHECK-LABEL: xor_v16i8_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: vctp.8 r0
|
||||
; CHECK-NEXT: vpsel q1, q1, q2
|
||||
; CHECK-NEXT: veor q0, q1, q0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: veort q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
|
||||
|
@ -274,11 +256,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
|
||||
; CHECK-LABEL: andnot_v4i32_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i8 q2, #0xff
|
||||
; CHECK-NEXT: vmvn q1, q1
|
||||
; CHECK-NEXT: vctp.32 r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: veort q2, q1, q2
|
||||
; CHECK-NEXT: vand q0, q2, q0
|
||||
; CHECK-NEXT: vandt q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
|
||||
|
@ -291,11 +272,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @andnot_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
|
||||
; CHECK-LABEL: andnot_v8i16_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i8 q2, #0xff
|
||||
; CHECK-NEXT: vmvn q1, q1
|
||||
; CHECK-NEXT: vctp.16 r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: veort q2, q1, q2
|
||||
; CHECK-NEXT: vand q0, q2, q0
|
||||
; CHECK-NEXT: vandt q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
|
||||
|
@ -308,11 +288,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <16 x i8> @andnot_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
|
||||
; CHECK-LABEL: andnot_v16i8_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i8 q2, #0xff
|
||||
; CHECK-NEXT: vmvn q1, q1
|
||||
; CHECK-NEXT: vctp.8 r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: veort q2, q1, q2
|
||||
; CHECK-NEXT: vand q0, q2, q0
|
||||
; CHECK-NEXT: vandt q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
|
||||
|
@ -325,12 +304,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @ornot_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
|
||||
; CHECK-LABEL: ornot_v4i32_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: vmov.i8 q3, #0xff
|
||||
; CHECK-NEXT: vmvn q1, q1
|
||||
; CHECK-NEXT: vctp.32 r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: veort q2, q1, q3
|
||||
; CHECK-NEXT: vorr q0, q2, q0
|
||||
; CHECK-NEXT: vorrt q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
|
||||
|
@ -343,12 +320,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @ornot_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
|
||||
; CHECK-LABEL: ornot_v8i16_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: vmov.i8 q3, #0xff
|
||||
; CHECK-NEXT: vmvn q1, q1
|
||||
; CHECK-NEXT: vctp.16 r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: veort q2, q1, q3
|
||||
; CHECK-NEXT: vorr q0, q2, q0
|
||||
; CHECK-NEXT: vorrt q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
|
||||
|
@ -361,12 +336,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <16 x i8> @ornot_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
|
||||
; CHECK-LABEL: ornot_v16i8_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: vmov.i8 q3, #0xff
|
||||
; CHECK-NEXT: vmvn q1, q1
|
||||
; CHECK-NEXT: vctp.8 r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: veort q2, q1, q3
|
||||
; CHECK-NEXT: vorr q0, q2, q0
|
||||
; CHECK-NEXT: vorrt q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
|
||||
|
@ -871,11 +844,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @addqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
|
||||
; CHECK-LABEL: addqr_v4i32_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: vctp.32 r1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vdupt.32 q1, r0
|
||||
; CHECK-NEXT: vadd.i32 q0, q1, q0
|
||||
; CHECK-NEXT: vaddt.i32 q0, q0, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
|
||||
|
@ -889,11 +860,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @addqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
|
||||
; CHECK-LABEL: addqr_v8i16_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: vctp.16 r1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vdupt.16 q1, r0
|
||||
; CHECK-NEXT: vadd.i16 q0, q1, q0
|
||||
; CHECK-NEXT: vaddt.i16 q0, q0, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
|
||||
|
@ -907,11 +876,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <16 x i8> @addqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
|
||||
; CHECK-LABEL: addqr_v16i8_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: vctp.8 r1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vdupt.8 q1, r0
|
||||
; CHECK-NEXT: vadd.i8 q0, q1, q0
|
||||
; CHECK-NEXT: vaddt.i8 q0, q0, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
|
||||
|
@ -925,11 +892,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @subqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
|
||||
; CHECK-LABEL: subqr_v4i32_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: vctp.32 r1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vdupt.32 q1, r0
|
||||
; CHECK-NEXT: vsub.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vsubt.i32 q0, q0, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
|
||||
|
@ -943,11 +908,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @subqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
|
||||
; CHECK-LABEL: subqr_v8i16_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: vctp.16 r1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vdupt.16 q1, r0
|
||||
; CHECK-NEXT: vsub.i16 q0, q0, q1
|
||||
; CHECK-NEXT: vsubt.i16 q0, q0, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
|
||||
|
@ -961,11 +924,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <16 x i8> @subqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
|
||||
; CHECK-LABEL: subqr_v16i8_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: vctp.8 r1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vdupt.8 q1, r0
|
||||
; CHECK-NEXT: vsub.i8 q0, q0, q1
|
||||
; CHECK-NEXT: vsubt.i8 q0, q0, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
|
||||
|
@ -979,11 +940,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @mulqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
|
||||
; CHECK-LABEL: mulqr_v4i32_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x1
|
||||
; CHECK-NEXT: vctp.32 r1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vdupt.32 q1, r0
|
||||
; CHECK-NEXT: vmul.i32 q0, q1, q0
|
||||
; CHECK-NEXT: vmult.i32 q0, q0, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
|
||||
|
@ -997,11 +956,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @mulqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
|
||||
; CHECK-LABEL: mulqr_v8i16_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i16 q1, #0x1
|
||||
; CHECK-NEXT: vctp.16 r1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vdupt.16 q1, r0
|
||||
; CHECK-NEXT: vmul.i16 q0, q1, q0
|
||||
; CHECK-NEXT: vmult.i16 q0, q0, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
|
||||
|
@ -1015,11 +972,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <16 x i8> @mulqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
|
||||
; CHECK-LABEL: mulqr_v16i8_x:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i8 q1, #0x1
|
||||
; CHECK-NEXT: vctp.8 r1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vdupt.8 q1, r0
|
||||
; CHECK-NEXT: vmul.i8 q0, q1, q0
|
||||
; CHECK-NEXT: vmult.i8 q0, q0, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
|
||||
|
@ -1327,10 +1282,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @add_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
|
||||
; CHECK-LABEL: add_v4i32_y:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: vctp.32 r0
|
||||
; CHECK-NEXT: vpsel q0, q0, q2
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vaddt.i32 q1, q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
|
||||
|
@ -1342,10 +1297,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @add_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
|
||||
; CHECK-LABEL: add_v8i16_y:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: vctp.16 r0
|
||||
; CHECK-NEXT: vpsel q0, q0, q2
|
||||
; CHECK-NEXT: vadd.i16 q0, q0, q1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vaddt.i16 q1, q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
|
||||
|
@ -1357,10 +1312,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <16 x i8> @add_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
|
||||
; CHECK-LABEL: add_v16i8_y:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: vctp.8 r0
|
||||
; CHECK-NEXT: vpsel q0, q0, q2
|
||||
; CHECK-NEXT: vadd.i8 q0, q0, q1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vaddt.i8 q1, q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
|
||||
|
@ -1417,10 +1372,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @mul_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
|
||||
; CHECK-LABEL: mul_v4i32_y:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x1
|
||||
; CHECK-NEXT: vctp.32 r0
|
||||
; CHECK-NEXT: vpsel q0, q0, q2
|
||||
; CHECK-NEXT: vmul.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmult.i32 q1, q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
|
||||
|
@ -1432,10 +1387,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @mul_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
|
||||
; CHECK-LABEL: mul_v8i16_y:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i16 q2, #0x1
|
||||
; CHECK-NEXT: vctp.16 r0
|
||||
; CHECK-NEXT: vpsel q0, q0, q2
|
||||
; CHECK-NEXT: vmul.i16 q0, q0, q1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmult.i16 q1, q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
|
||||
|
@ -1447,10 +1402,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <16 x i8> @mul_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
|
||||
; CHECK-LABEL: mul_v16i8_y:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i8 q2, #0x1
|
||||
; CHECK-NEXT: vctp.8 r0
|
||||
; CHECK-NEXT: vpsel q0, q0, q2
|
||||
; CHECK-NEXT: vmul.i8 q0, q0, q1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmult.i8 q1, q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
|
||||
|
@ -1462,10 +1417,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @and_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
|
||||
; CHECK-LABEL: and_v4i32_y:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i8 q2, #0xff
|
||||
; CHECK-NEXT: vctp.32 r0
|
||||
; CHECK-NEXT: vpsel q0, q0, q2
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vandt q1, q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
|
||||
|
@ -1477,10 +1432,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @and_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
|
||||
; CHECK-LABEL: and_v8i16_y:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i8 q2, #0xff
|
||||
; CHECK-NEXT: vctp.16 r0
|
||||
; CHECK-NEXT: vpsel q0, q0, q2
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vandt q1, q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
|
||||
|
@ -1492,10 +1447,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <16 x i8> @and_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
|
||||
; CHECK-LABEL: and_v16i8_y:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i8 q2, #0xff
|
||||
; CHECK-NEXT: vctp.8 r0
|
||||
; CHECK-NEXT: vpsel q0, q0, q2
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vandt q1, q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
|
||||
|
@ -1507,10 +1462,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @or_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
|
||||
; CHECK-LABEL: or_v4i32_y:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: vctp.32 r0
|
||||
; CHECK-NEXT: vpsel q0, q0, q2
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vorrt q1, q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
|
||||
|
@ -1522,10 +1477,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @or_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
|
||||
; CHECK-LABEL: or_v8i16_y:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: vctp.16 r0
|
||||
; CHECK-NEXT: vpsel q0, q0, q2
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vorrt q1, q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
|
||||
|
@ -1537,10 +1492,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <16 x i8> @or_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
|
||||
; CHECK-LABEL: or_v16i8_y:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: vctp.8 r0
|
||||
; CHECK-NEXT: vpsel q0, q0, q2
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vorrt q1, q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
|
||||
|
@ -1552,10 +1507,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @xor_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
|
||||
; CHECK-LABEL: xor_v4i32_y:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: vctp.32 r0
|
||||
; CHECK-NEXT: vpsel q0, q0, q2
|
||||
; CHECK-NEXT: veor q0, q0, q1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: veort q1, q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
|
||||
|
@ -1567,10 +1522,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
|
||||
; CHECK-LABEL: xor_v8i16_y:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: vctp.16 r0
|
||||
; CHECK-NEXT: vpsel q0, q0, q2
|
||||
; CHECK-NEXT: veor q0, q0, q1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: veort q1, q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
|
||||
|
@ -1582,10 +1537,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <16 x i8> @xor_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
|
||||
; CHECK-LABEL: xor_v16i8_y:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: vctp.8 r0
|
||||
; CHECK-NEXT: vpsel q0, q0, q2
|
||||
; CHECK-NEXT: veor q0, q0, q1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: veort q1, q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
|
||||
|
@ -2219,10 +2174,11 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @addqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
|
||||
; CHECK-LABEL: addqr_v4i32_y:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: vdup.32 q1, r0
|
||||
; CHECK-NEXT: vctp.32 r1
|
||||
; CHECK-NEXT: vpsel q0, q0, q1
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vaddt.i32 q1, q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
|
||||
|
@ -2236,10 +2192,11 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @addqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
|
||||
; CHECK-LABEL: addqr_v8i16_y:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: vdup.16 q1, r0
|
||||
; CHECK-NEXT: vctp.16 r1
|
||||
; CHECK-NEXT: vpsel q0, q0, q1
|
||||
; CHECK-NEXT: vadd.i16 q0, q0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vaddt.i16 q1, q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
|
||||
|
@ -2253,10 +2210,11 @@ entry:
|
|||
define arm_aapcs_vfpcc <16 x i8> @addqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
|
||||
; CHECK-LABEL: addqr_v16i8_y:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: vdup.8 q1, r0
|
||||
; CHECK-NEXT: vctp.8 r1
|
||||
; CHECK-NEXT: vpsel q0, q0, q1
|
||||
; CHECK-NEXT: vadd.i8 q0, q0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vaddt.i8 q1, q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
|
||||
|
@ -2324,10 +2282,11 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @mulqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
|
||||
; CHECK-LABEL: mulqr_v4i32_y:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x1
|
||||
; CHECK-NEXT: vdup.32 q1, r0
|
||||
; CHECK-NEXT: vctp.32 r1
|
||||
; CHECK-NEXT: vpsel q0, q0, q1
|
||||
; CHECK-NEXT: vmul.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmult.i32 q1, q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
|
||||
|
@ -2341,10 +2300,11 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @mulqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
|
||||
; CHECK-LABEL: mulqr_v8i16_y:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i16 q1, #0x1
|
||||
; CHECK-NEXT: vdup.16 q1, r0
|
||||
; CHECK-NEXT: vctp.16 r1
|
||||
; CHECK-NEXT: vpsel q0, q0, q1
|
||||
; CHECK-NEXT: vmul.i16 q0, q0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmult.i16 q1, q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
|
||||
|
@ -2358,10 +2318,11 @@ entry:
|
|||
define arm_aapcs_vfpcc <16 x i8> @mulqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
|
||||
; CHECK-LABEL: mulqr_v16i8_y:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i8 q1, #0x1
|
||||
; CHECK-NEXT: vdup.8 q1, r0
|
||||
; CHECK-NEXT: vctp.8 r1
|
||||
; CHECK-NEXT: vpsel q0, q0, q1
|
||||
; CHECK-NEXT: vmul.i8 q0, q0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmult.i8 q1, q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -3589,6 +3589,9 @@ static bool hasNullFragReference(DagInit *DI) {
|
|||
if (Operator->getName() == "null_frag") return true;
|
||||
// If any of the arguments reference the null fragment, return true.
|
||||
for (unsigned i = 0, e = DI->getNumArgs(); i != e; ++i) {
|
||||
if (auto Arg = dyn_cast<DefInit>(DI->getArg(i)))
|
||||
if (Arg->getDef()->getName() == "null_frag")
|
||||
return true;
|
||||
DagInit *Arg = dyn_cast<DagInit>(DI->getArg(i));
|
||||
if (Arg && hasNullFragReference(Arg))
|
||||
return true;
|
||||
|
|
Loading…
Reference in New Issue