forked from OSchip/llvm-project
[ARM] Create VMOVRRD from adjacent vector extracts
This adds a combine for extract(x, n); extract(x, n+1) -> VMOVRRD(extract x, n/2). This allows two vector lanes to be moved at the same time in a single instruction, and thanks to the other VMOVRRD folds we have added recently can help reduce the amount of executed instructions. Floating point types are very similar, but will include a bitcast to an integer type. This also adds a shouldRewriteCopySrc, to prevent copy propagation from DPR to SPR, which can break as not all DPR regs can be extracted from directly. Otherwise the machine verifier is unhappy. Differential Revision: https://reviews.llvm.org/D100244
This commit is contained in:
parent
6d0fef4860
commit
48cef1fa8e
|
@ -909,3 +909,17 @@ bool ARMBaseRegisterInfo::shouldCoalesce(MachineInstr *MI,
|
|||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ARMBaseRegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
|
||||
unsigned DefSubReg,
|
||||
const TargetRegisterClass *SrcRC,
|
||||
unsigned SrcSubReg) const {
|
||||
// We can't extract an SPR from an arbitary DPR (as opposed to a DPR_VFP2).
|
||||
if (DefRC == &ARM::SPRRegClass && DefSubReg == 0 &&
|
||||
SrcRC == &ARM::DPRRegClass &&
|
||||
(SrcSubReg == ARM::ssub_0 || SrcSubReg == ARM::ssub_1))
|
||||
return false;
|
||||
|
||||
return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg,
|
||||
SrcRC, SrcSubReg);
|
||||
}
|
|
@ -209,6 +209,11 @@ public:
|
|||
unsigned DstSubReg,
|
||||
const TargetRegisterClass *NewRC,
|
||||
LiveIntervals &LIS) const override;
|
||||
|
||||
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
|
||||
unsigned DefSubReg,
|
||||
const TargetRegisterClass *SrcRC,
|
||||
unsigned SrcSubReg) const;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
|
|
@ -14081,6 +14081,69 @@ static SDValue PerformInsertEltCombine(SDNode *N,
|
|||
return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
|
||||
}
|
||||
|
||||
// Convert a pair of extracts from the same base vector to a VMOVRRD. Either
|
||||
// directly or bitcast to an integer if the original is a float vector.
|
||||
// extract(x, n); extract(x, n+1) -> VMOVRRD(extract v2f64 x, n/2)
|
||||
// bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD(extract x, n/2)
|
||||
static SDValue
|
||||
PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
|
||||
EVT VT = N->getValueType(0);
|
||||
SDLoc dl(N);
|
||||
|
||||
if (!DCI.isAfterLegalizeDAG() || VT != MVT::i32)
|
||||
return SDValue();
|
||||
|
||||
SDValue Ext = SDValue(N, 0);
|
||||
if (Ext.getOpcode() == ISD::BITCAST &&
|
||||
Ext.getOperand(0).getValueType() == MVT::f32)
|
||||
Ext = Ext.getOperand(0);
|
||||
if (Ext.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
|
||||
!isa<ConstantSDNode>(Ext.getOperand(1)) ||
|
||||
Ext.getConstantOperandVal(1) % 2 != 0)
|
||||
return SDValue();
|
||||
if (Ext->use_size() == 1 &&
|
||||
(Ext->use_begin()->getOpcode() == ISD::SINT_TO_FP ||
|
||||
Ext->use_begin()->getOpcode() == ISD::UINT_TO_FP))
|
||||
return SDValue();
|
||||
|
||||
SDValue Op0 = Ext.getOperand(0);
|
||||
EVT VecVT = Op0.getValueType();
|
||||
unsigned Lane = Ext.getConstantOperandVal(1);
|
||||
if (VecVT.getVectorNumElements() != 4)
|
||||
return SDValue();
|
||||
|
||||
// Find another extract, of Lane + 1
|
||||
auto OtherIt = find_if(Op0->uses(), [&](SDNode *V) {
|
||||
return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
|
||||
isa<ConstantSDNode>(V->getOperand(1)) &&
|
||||
V->getConstantOperandVal(1) == Lane + 1;
|
||||
});
|
||||
if (OtherIt == Op0->uses().end())
|
||||
return SDValue();
|
||||
|
||||
// For float extracts, we need to be converting to a i32 for both vector
|
||||
// lanes.
|
||||
SDValue OtherExt(*OtherIt, 0);
|
||||
if (OtherExt.getValueType() != MVT::i32) {
|
||||
if (OtherExt->use_size() != 1 ||
|
||||
OtherExt->use_begin()->getOpcode() != ISD::BITCAST ||
|
||||
OtherExt->use_begin()->getValueType(0) != MVT::i32)
|
||||
return SDValue();
|
||||
OtherExt = SDValue(*OtherExt->use_begin(), 0);
|
||||
}
|
||||
|
||||
// Convert the type to a f64 and extract with a VMOVRRD.
|
||||
SDValue F64 = DCI.DAG.getNode(
|
||||
ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
|
||||
DCI.DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v2f64, Op0),
|
||||
DCI.DAG.getConstant(Ext.getConstantOperandVal(1) / 2, dl, MVT::i32));
|
||||
SDValue VMOVRRD =
|
||||
DCI.DAG.getNode(ARMISD::VMOVRRD, dl, {MVT::i32, MVT::i32}, F64);
|
||||
|
||||
DCI.CombineTo(OtherExt.getNode(), SDValue(VMOVRRD.getNode(), 1));
|
||||
return VMOVRRD;
|
||||
}
|
||||
|
||||
static SDValue PerformExtractEltCombine(SDNode *N,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const ARMSubtarget *ST) {
|
||||
|
@ -14115,6 +14178,10 @@ static SDValue PerformExtractEltCombine(SDNode *N,
|
|||
return MOV.getOperand(ST->isLittle() ? Offset % 2 : 1 - Offset % 2);
|
||||
}
|
||||
|
||||
// extract x, n; extract x, n+1 -> VMOVRRD x
|
||||
if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
|
||||
return R;
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
@ -16535,8 +16602,10 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
|
|||
return Res;
|
||||
}
|
||||
|
||||
static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG,
|
||||
const ARMSubtarget *ST) {
|
||||
static SDValue PerformBITCASTCombine(SDNode *N,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const ARMSubtarget *ST) {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
SDValue Src = N->getOperand(0);
|
||||
EVT DstVT = N->getValueType(0);
|
||||
|
||||
|
@ -16562,6 +16631,10 @@ static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG,
|
|||
DAG.getDataLayout().isBigEndian())
|
||||
return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(N), DstVT, Src);
|
||||
|
||||
// bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD x
|
||||
if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
|
||||
return R;
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
@ -16633,7 +16706,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
|
|||
case ARMISD::BUILD_VECTOR:
|
||||
return PerformARMBUILD_VECTORCombine(N, DCI);
|
||||
case ISD::BITCAST:
|
||||
return PerformBITCASTCombine(N, DCI.DAG, Subtarget);
|
||||
return PerformBITCASTCombine(N, DCI, Subtarget);
|
||||
case ARMISD::PREDICATE_CAST:
|
||||
return PerformPREDICATE_CASTCombine(N, DCI);
|
||||
case ARMISD::VECTOR_REG_CAST:
|
||||
|
|
|
@ -12,26 +12,22 @@ define <2 x i1> @uaddo(<2 x i64> *%ptr, <2 x i64> *%ptr2) {
|
|||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
|
||||
; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
|
||||
; CHECK-NEXT: movs r1, #0
|
||||
; CHECK-NEXT: vmov r3, r2, d18
|
||||
; CHECK-NEXT: vadd.i64 q8, q9, q8
|
||||
; CHECK-NEXT: vmov.32 r3, d18[0]
|
||||
; CHECK-NEXT: vmov.32 r2, d18[1]
|
||||
; CHECK-NEXT: vmov.32 r12, d16[0]
|
||||
; CHECK-NEXT: vmov.32 lr, d16[1]
|
||||
; CHECK-NEXT: vmov.32 r4, d17[0]
|
||||
; CHECK-NEXT: vmov.32 r5, d19[0]
|
||||
; CHECK-NEXT: vmov.32 r6, d17[1]
|
||||
; CHECK-NEXT: vmov.32 r7, d19[1]
|
||||
; CHECK-NEXT: subs.w r3, r12, r3
|
||||
; CHECK-NEXT: sbcs.w r2, lr, r2
|
||||
; CHECK-NEXT: movs r1, #0
|
||||
; CHECK-NEXT: vmov r6, r7, d19
|
||||
; CHECK-NEXT: vmov lr, r12, d16
|
||||
; CHECK-NEXT: vmov r4, r5, d17
|
||||
; CHECK-NEXT: subs.w r3, lr, r3
|
||||
; CHECK-NEXT: sbcs.w r2, r12, r2
|
||||
; CHECK-NEXT: mov.w r2, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: movne.w r2, #-1
|
||||
; CHECK-NEXT: subs r3, r4, r5
|
||||
; CHECK-NEXT: sbcs.w r3, r6, r7
|
||||
; CHECK-NEXT: subs r3, r4, r6
|
||||
; CHECK-NEXT: sbcs.w r3, r5, r7
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
|
@ -57,24 +53,20 @@ define <2 x i1> @usubo(<2 x i64> *%ptr, <2 x i64> *%ptr2) {
|
|||
; CHECK-NEXT: movs r1, #0
|
||||
; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
|
||||
; CHECK-NEXT: vsub.i64 q8, q9, q8
|
||||
; CHECK-NEXT: vmov.32 r12, d18[0]
|
||||
; CHECK-NEXT: vmov.32 lr, d18[1]
|
||||
; CHECK-NEXT: vmov.32 r3, d16[0]
|
||||
; CHECK-NEXT: vmov.32 r2, d16[1]
|
||||
; CHECK-NEXT: vmov.32 r4, d19[0]
|
||||
; CHECK-NEXT: vmov.32 r5, d17[0]
|
||||
; CHECK-NEXT: vmov.32 r6, d19[1]
|
||||
; CHECK-NEXT: vmov.32 r7, d17[1]
|
||||
; CHECK-NEXT: subs.w r3, r12, r3
|
||||
; CHECK-NEXT: sbcs.w r2, lr, r2
|
||||
; CHECK-NEXT: vmov lr, r12, d18
|
||||
; CHECK-NEXT: vmov r4, r5, d19
|
||||
; CHECK-NEXT: vmov r3, r2, d16
|
||||
; CHECK-NEXT: vmov r6, r7, d17
|
||||
; CHECK-NEXT: subs.w r3, lr, r3
|
||||
; CHECK-NEXT: sbcs.w r2, r12, r2
|
||||
; CHECK-NEXT: mov.w r2, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: movne.w r2, #-1
|
||||
; CHECK-NEXT: subs r3, r4, r5
|
||||
; CHECK-NEXT: sbcs.w r3, r6, r7
|
||||
; CHECK-NEXT: subs r3, r4, r6
|
||||
; CHECK-NEXT: sbcs.w r3, r5, r7
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
|
|
|
@ -492,24 +492,23 @@ entry:
|
|||
define void @conv_v8f16_to_i128( <8 x half> %a, i128* %store ) {
|
||||
; CHECK-LABEL: conv_v8f16_to_i128:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r11, lr}
|
||||
; CHECK-NEXT: push {r11, lr}
|
||||
; CHECK-NEXT: adr r1, .LCPI18_0
|
||||
; CHECK-NEXT: vrev64.16 q9, q0
|
||||
; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128]
|
||||
; CHECK-NEXT: vrev64.16 q8, q8
|
||||
; CHECK-NEXT: vadd.f16 q8, q9, q8
|
||||
; CHECK-NEXT: vrev32.16 q8, q8
|
||||
; CHECK-NEXT: vmov.32 r12, d17[1]
|
||||
; CHECK-NEXT: vmov.32 r2, d17[0]
|
||||
; CHECK-NEXT: vmov.32 r3, d16[1]
|
||||
; CHECK-NEXT: vmov.32 r1, d16[0]
|
||||
; CHECK-NEXT: subs r12, r12, #1
|
||||
; CHECK-NEXT: str r12, [r0, #12]
|
||||
; CHECK-NEXT: sbcs r2, r2, #0
|
||||
; CHECK-NEXT: str r2, [r0, #8]
|
||||
; CHECK-NEXT: sbcs r3, r3, #0
|
||||
; CHECK-NEXT: sbc r1, r1, #0
|
||||
; CHECK-NEXT: stm r0, {r1, r3}
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov r12, r2, d17
|
||||
; CHECK-NEXT: vmov r3, r1, d16
|
||||
; CHECK-NEXT: subs lr, r2, #1
|
||||
; CHECK-NEXT: sbcs r2, r12, #0
|
||||
; CHECK-NEXT: sbcs r1, r1, #0
|
||||
; CHECK-NEXT: sbc r3, r3, #0
|
||||
; CHECK-NEXT: str r3, [r0]
|
||||
; CHECK-NEXT: stmib r0, {r1, r2, lr}
|
||||
; CHECK-NEXT: pop {r11, pc}
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI18_0:
|
||||
|
|
|
@ -1050,21 +1050,13 @@ define fp128 @test_f128_v2f64(<2 x double> %p) {
|
|||
; SOFT-NEXT: .pad #16
|
||||
; SOFT-NEXT: sub sp, sp, #16
|
||||
; SOFT-NEXT: vmov d16, r3, r2
|
||||
; SOFT-NEXT: add r12, sp, #12
|
||||
; SOFT-NEXT: vmov d17, r1, r0
|
||||
; SOFT-NEXT: vadd.f64 d19, d16, d16
|
||||
; SOFT-NEXT: vadd.f64 d18, d17, d17
|
||||
; SOFT-NEXT: vrev64.32 q8, q9
|
||||
; SOFT-NEXT: vmov.32 r0, d16[0]
|
||||
; SOFT-NEXT: vst1.32 {d17[1]}, [r12:32]
|
||||
; SOFT-NEXT: add r12, sp, #8
|
||||
; SOFT-NEXT: vst1.32 {d16[0]}, [sp:32]
|
||||
; SOFT-NEXT: vst1.32 {d17[0]}, [r12:32]
|
||||
; SOFT-NEXT: add r12, sp, #4
|
||||
; SOFT-NEXT: vst1.32 {d16[1]}, [r12:32]
|
||||
; SOFT-NEXT: vmov.32 r1, d16[1]
|
||||
; SOFT-NEXT: vmov.32 r2, d17[0]
|
||||
; SOFT-NEXT: vmov.32 r3, d17[1]
|
||||
; SOFT-NEXT: vmov r2, r3, d17
|
||||
; SOFT-NEXT: vmov r0, r1, d16
|
||||
; SOFT-NEXT: stm sp, {r0, r1, r2, r3}
|
||||
; SOFT-NEXT: bl __addtf3
|
||||
; SOFT-NEXT: add sp, sp, #16
|
||||
; SOFT-NEXT: pop {r11, pc}
|
||||
|
@ -1076,19 +1068,11 @@ define fp128 @test_f128_v2f64(<2 x double> %p) {
|
|||
; HARD-NEXT: .pad #16
|
||||
; HARD-NEXT: sub sp, sp, #16
|
||||
; HARD-NEXT: vadd.f64 d17, d1, d1
|
||||
; HARD-NEXT: add r12, sp, #12
|
||||
; HARD-NEXT: vadd.f64 d16, d0, d0
|
||||
; HARD-NEXT: vrev64.32 q8, q8
|
||||
; HARD-NEXT: vmov.32 r0, d16[0]
|
||||
; HARD-NEXT: vst1.32 {d17[1]}, [r12:32]
|
||||
; HARD-NEXT: add r12, sp, #8
|
||||
; HARD-NEXT: vst1.32 {d16[0]}, [sp:32]
|
||||
; HARD-NEXT: vst1.32 {d17[0]}, [r12:32]
|
||||
; HARD-NEXT: add r12, sp, #4
|
||||
; HARD-NEXT: vst1.32 {d16[1]}, [r12:32]
|
||||
; HARD-NEXT: vmov.32 r1, d16[1]
|
||||
; HARD-NEXT: vmov.32 r2, d17[0]
|
||||
; HARD-NEXT: vmov.32 r3, d17[1]
|
||||
; HARD-NEXT: vmov r2, r3, d17
|
||||
; HARD-NEXT: vmov r0, r1, d16
|
||||
; HARD-NEXT: stm sp, {r0, r1, r2, r3}
|
||||
; HARD-NEXT: bl __addtf3
|
||||
; HARD-NEXT: add sp, sp, #16
|
||||
; HARD-NEXT: pop {r11, pc}
|
||||
|
@ -1106,20 +1090,12 @@ define fp128 @test_f128_v2i64(<2 x i64> %p) {
|
|||
; SOFT-NEXT: .pad #16
|
||||
; SOFT-NEXT: sub sp, sp, #16
|
||||
; SOFT-NEXT: vmov d17, r3, r2
|
||||
; SOFT-NEXT: add r12, sp, #12
|
||||
; SOFT-NEXT: vmov d16, r1, r0
|
||||
; SOFT-NEXT: vadd.i64 q8, q8, q8
|
||||
; SOFT-NEXT: vrev64.32 q8, q8
|
||||
; SOFT-NEXT: vmov.32 r0, d16[0]
|
||||
; SOFT-NEXT: vst1.32 {d17[1]}, [r12:32]
|
||||
; SOFT-NEXT: add r12, sp, #8
|
||||
; SOFT-NEXT: vst1.32 {d16[0]}, [sp:32]
|
||||
; SOFT-NEXT: vst1.32 {d17[0]}, [r12:32]
|
||||
; SOFT-NEXT: add r12, sp, #4
|
||||
; SOFT-NEXT: vst1.32 {d16[1]}, [r12:32]
|
||||
; SOFT-NEXT: vmov.32 r1, d16[1]
|
||||
; SOFT-NEXT: vmov.32 r2, d17[0]
|
||||
; SOFT-NEXT: vmov.32 r3, d17[1]
|
||||
; SOFT-NEXT: vmov r2, r3, d17
|
||||
; SOFT-NEXT: vmov r0, r1, d16
|
||||
; SOFT-NEXT: stm sp, {r0, r1, r2, r3}
|
||||
; SOFT-NEXT: bl __addtf3
|
||||
; SOFT-NEXT: add sp, sp, #16
|
||||
; SOFT-NEXT: pop {r11, pc}
|
||||
|
@ -1131,18 +1107,10 @@ define fp128 @test_f128_v2i64(<2 x i64> %p) {
|
|||
; HARD-NEXT: .pad #16
|
||||
; HARD-NEXT: sub sp, sp, #16
|
||||
; HARD-NEXT: vadd.i64 q8, q0, q0
|
||||
; HARD-NEXT: add r12, sp, #12
|
||||
; HARD-NEXT: vrev64.32 q8, q8
|
||||
; HARD-NEXT: vmov.32 r0, d16[0]
|
||||
; HARD-NEXT: vst1.32 {d17[1]}, [r12:32]
|
||||
; HARD-NEXT: add r12, sp, #8
|
||||
; HARD-NEXT: vst1.32 {d16[0]}, [sp:32]
|
||||
; HARD-NEXT: vst1.32 {d17[0]}, [r12:32]
|
||||
; HARD-NEXT: add r12, sp, #4
|
||||
; HARD-NEXT: vst1.32 {d16[1]}, [r12:32]
|
||||
; HARD-NEXT: vmov.32 r1, d16[1]
|
||||
; HARD-NEXT: vmov.32 r2, d17[0]
|
||||
; HARD-NEXT: vmov.32 r3, d17[1]
|
||||
; HARD-NEXT: vmov r2, r3, d17
|
||||
; HARD-NEXT: vmov r0, r1, d16
|
||||
; HARD-NEXT: stm sp, {r0, r1, r2, r3}
|
||||
; HARD-NEXT: bl __addtf3
|
||||
; HARD-NEXT: add sp, sp, #16
|
||||
; HARD-NEXT: pop {r11, pc}
|
||||
|
@ -1160,20 +1128,12 @@ define fp128 @test_f128_v4f32(<4 x float> %p) {
|
|||
; SOFT-NEXT: .pad #16
|
||||
; SOFT-NEXT: sub sp, sp, #16
|
||||
; SOFT-NEXT: vmov d17, r3, r2
|
||||
; SOFT-NEXT: add r12, sp, #12
|
||||
; SOFT-NEXT: vmov d16, r1, r0
|
||||
; SOFT-NEXT: vrev64.32 q8, q8
|
||||
; SOFT-NEXT: vadd.f32 q8, q8, q8
|
||||
; SOFT-NEXT: vmov.32 r0, d16[0]
|
||||
; SOFT-NEXT: vst1.32 {d17[1]}, [r12:32]
|
||||
; SOFT-NEXT: add r12, sp, #8
|
||||
; SOFT-NEXT: vst1.32 {d16[0]}, [sp:32]
|
||||
; SOFT-NEXT: vst1.32 {d17[0]}, [r12:32]
|
||||
; SOFT-NEXT: add r12, sp, #4
|
||||
; SOFT-NEXT: vst1.32 {d16[1]}, [r12:32]
|
||||
; SOFT-NEXT: vmov.32 r1, d16[1]
|
||||
; SOFT-NEXT: vmov.32 r2, d17[0]
|
||||
; SOFT-NEXT: vmov.32 r3, d17[1]
|
||||
; SOFT-NEXT: vmov r2, r3, d17
|
||||
; SOFT-NEXT: vmov r0, r1, d16
|
||||
; SOFT-NEXT: stm sp, {r0, r1, r2, r3}
|
||||
; SOFT-NEXT: bl __addtf3
|
||||
; SOFT-NEXT: add sp, sp, #16
|
||||
; SOFT-NEXT: pop {r11, pc}
|
||||
|
@ -1185,18 +1145,10 @@ define fp128 @test_f128_v4f32(<4 x float> %p) {
|
|||
; HARD-NEXT: .pad #16
|
||||
; HARD-NEXT: sub sp, sp, #16
|
||||
; HARD-NEXT: vrev64.32 q8, q0
|
||||
; HARD-NEXT: add r12, sp, #12
|
||||
; HARD-NEXT: vadd.f32 q8, q8, q8
|
||||
; HARD-NEXT: vmov.32 r0, d16[0]
|
||||
; HARD-NEXT: vst1.32 {d17[1]}, [r12:32]
|
||||
; HARD-NEXT: add r12, sp, #8
|
||||
; HARD-NEXT: vst1.32 {d16[0]}, [sp:32]
|
||||
; HARD-NEXT: vst1.32 {d17[0]}, [r12:32]
|
||||
; HARD-NEXT: add r12, sp, #4
|
||||
; HARD-NEXT: vst1.32 {d16[1]}, [r12:32]
|
||||
; HARD-NEXT: vmov.32 r1, d16[1]
|
||||
; HARD-NEXT: vmov.32 r2, d17[0]
|
||||
; HARD-NEXT: vmov.32 r3, d17[1]
|
||||
; HARD-NEXT: vmov r2, r3, d17
|
||||
; HARD-NEXT: vmov r0, r1, d16
|
||||
; HARD-NEXT: stm sp, {r0, r1, r2, r3}
|
||||
; HARD-NEXT: bl __addtf3
|
||||
; HARD-NEXT: add sp, sp, #16
|
||||
; HARD-NEXT: pop {r11, pc}
|
||||
|
@ -1214,20 +1166,12 @@ define fp128 @test_f128_v4i32(<4 x i32> %p) {
|
|||
; SOFT-NEXT: .pad #16
|
||||
; SOFT-NEXT: sub sp, sp, #16
|
||||
; SOFT-NEXT: vmov d17, r3, r2
|
||||
; SOFT-NEXT: add r12, sp, #12
|
||||
; SOFT-NEXT: vmov d16, r1, r0
|
||||
; SOFT-NEXT: vrev64.32 q8, q8
|
||||
; SOFT-NEXT: vadd.i32 q8, q8, q8
|
||||
; SOFT-NEXT: vmov.32 r0, d16[0]
|
||||
; SOFT-NEXT: vst1.32 {d17[1]}, [r12:32]
|
||||
; SOFT-NEXT: add r12, sp, #8
|
||||
; SOFT-NEXT: vst1.32 {d16[0]}, [sp:32]
|
||||
; SOFT-NEXT: vst1.32 {d17[0]}, [r12:32]
|
||||
; SOFT-NEXT: add r12, sp, #4
|
||||
; SOFT-NEXT: vst1.32 {d16[1]}, [r12:32]
|
||||
; SOFT-NEXT: vmov.32 r1, d16[1]
|
||||
; SOFT-NEXT: vmov.32 r2, d17[0]
|
||||
; SOFT-NEXT: vmov.32 r3, d17[1]
|
||||
; SOFT-NEXT: vmov r2, r3, d17
|
||||
; SOFT-NEXT: vmov r0, r1, d16
|
||||
; SOFT-NEXT: stm sp, {r0, r1, r2, r3}
|
||||
; SOFT-NEXT: bl __addtf3
|
||||
; SOFT-NEXT: add sp, sp, #16
|
||||
; SOFT-NEXT: pop {r11, pc}
|
||||
|
@ -1239,18 +1183,10 @@ define fp128 @test_f128_v4i32(<4 x i32> %p) {
|
|||
; HARD-NEXT: .pad #16
|
||||
; HARD-NEXT: sub sp, sp, #16
|
||||
; HARD-NEXT: vrev64.32 q8, q0
|
||||
; HARD-NEXT: add r12, sp, #12
|
||||
; HARD-NEXT: vadd.i32 q8, q8, q8
|
||||
; HARD-NEXT: vmov.32 r0, d16[0]
|
||||
; HARD-NEXT: vst1.32 {d17[1]}, [r12:32]
|
||||
; HARD-NEXT: add r12, sp, #8
|
||||
; HARD-NEXT: vst1.32 {d16[0]}, [sp:32]
|
||||
; HARD-NEXT: vst1.32 {d17[0]}, [r12:32]
|
||||
; HARD-NEXT: add r12, sp, #4
|
||||
; HARD-NEXT: vst1.32 {d16[1]}, [r12:32]
|
||||
; HARD-NEXT: vmov.32 r1, d16[1]
|
||||
; HARD-NEXT: vmov.32 r2, d17[0]
|
||||
; HARD-NEXT: vmov.32 r3, d17[1]
|
||||
; HARD-NEXT: vmov r2, r3, d17
|
||||
; HARD-NEXT: vmov r0, r1, d16
|
||||
; HARD-NEXT: stm sp, {r0, r1, r2, r3}
|
||||
; HARD-NEXT: bl __addtf3
|
||||
; HARD-NEXT: add sp, sp, #16
|
||||
; HARD-NEXT: pop {r11, pc}
|
||||
|
@ -1268,21 +1204,13 @@ define fp128 @test_f128_v8i16(<8 x i16> %p) {
|
|||
; SOFT-NEXT: .pad #16
|
||||
; SOFT-NEXT: sub sp, sp, #16
|
||||
; SOFT-NEXT: vmov d17, r3, r2
|
||||
; SOFT-NEXT: add r12, sp, #12
|
||||
; SOFT-NEXT: vmov d16, r1, r0
|
||||
; SOFT-NEXT: vrev64.16 q8, q8
|
||||
; SOFT-NEXT: vadd.i16 q8, q8, q8
|
||||
; SOFT-NEXT: vrev32.16 q8, q8
|
||||
; SOFT-NEXT: vmov.32 r0, d16[0]
|
||||
; SOFT-NEXT: vst1.32 {d17[1]}, [r12:32]
|
||||
; SOFT-NEXT: add r12, sp, #8
|
||||
; SOFT-NEXT: vst1.32 {d16[0]}, [sp:32]
|
||||
; SOFT-NEXT: vst1.32 {d17[0]}, [r12:32]
|
||||
; SOFT-NEXT: add r12, sp, #4
|
||||
; SOFT-NEXT: vst1.32 {d16[1]}, [r12:32]
|
||||
; SOFT-NEXT: vmov.32 r1, d16[1]
|
||||
; SOFT-NEXT: vmov.32 r2, d17[0]
|
||||
; SOFT-NEXT: vmov.32 r3, d17[1]
|
||||
; SOFT-NEXT: vmov r2, r3, d17
|
||||
; SOFT-NEXT: vmov r0, r1, d16
|
||||
; SOFT-NEXT: stm sp, {r0, r1, r2, r3}
|
||||
; SOFT-NEXT: bl __addtf3
|
||||
; SOFT-NEXT: add sp, sp, #16
|
||||
; SOFT-NEXT: pop {r11, pc}
|
||||
|
@ -1294,19 +1222,11 @@ define fp128 @test_f128_v8i16(<8 x i16> %p) {
|
|||
; HARD-NEXT: .pad #16
|
||||
; HARD-NEXT: sub sp, sp, #16
|
||||
; HARD-NEXT: vrev64.16 q8, q0
|
||||
; HARD-NEXT: add r12, sp, #12
|
||||
; HARD-NEXT: vadd.i16 q8, q8, q8
|
||||
; HARD-NEXT: vrev32.16 q8, q8
|
||||
; HARD-NEXT: vmov.32 r0, d16[0]
|
||||
; HARD-NEXT: vst1.32 {d17[1]}, [r12:32]
|
||||
; HARD-NEXT: add r12, sp, #8
|
||||
; HARD-NEXT: vst1.32 {d16[0]}, [sp:32]
|
||||
; HARD-NEXT: vst1.32 {d17[0]}, [r12:32]
|
||||
; HARD-NEXT: add r12, sp, #4
|
||||
; HARD-NEXT: vst1.32 {d16[1]}, [r12:32]
|
||||
; HARD-NEXT: vmov.32 r1, d16[1]
|
||||
; HARD-NEXT: vmov.32 r2, d17[0]
|
||||
; HARD-NEXT: vmov.32 r3, d17[1]
|
||||
; HARD-NEXT: vmov r2, r3, d17
|
||||
; HARD-NEXT: vmov r0, r1, d16
|
||||
; HARD-NEXT: stm sp, {r0, r1, r2, r3}
|
||||
; HARD-NEXT: bl __addtf3
|
||||
; HARD-NEXT: add sp, sp, #16
|
||||
; HARD-NEXT: pop {r11, pc}
|
||||
|
@ -1324,21 +1244,13 @@ define fp128 @test_f128_v16i8(<16 x i8> %p) {
|
|||
; SOFT-NEXT: .pad #16
|
||||
; SOFT-NEXT: sub sp, sp, #16
|
||||
; SOFT-NEXT: vmov d17, r3, r2
|
||||
; SOFT-NEXT: add r12, sp, #12
|
||||
; SOFT-NEXT: vmov d16, r1, r0
|
||||
; SOFT-NEXT: vrev64.8 q8, q8
|
||||
; SOFT-NEXT: vadd.i8 q8, q8, q8
|
||||
; SOFT-NEXT: vrev32.8 q8, q8
|
||||
; SOFT-NEXT: vmov.32 r0, d16[0]
|
||||
; SOFT-NEXT: vst1.32 {d17[1]}, [r12:32]
|
||||
; SOFT-NEXT: add r12, sp, #8
|
||||
; SOFT-NEXT: vst1.32 {d16[0]}, [sp:32]
|
||||
; SOFT-NEXT: vst1.32 {d17[0]}, [r12:32]
|
||||
; SOFT-NEXT: add r12, sp, #4
|
||||
; SOFT-NEXT: vst1.32 {d16[1]}, [r12:32]
|
||||
; SOFT-NEXT: vmov.32 r1, d16[1]
|
||||
; SOFT-NEXT: vmov.32 r2, d17[0]
|
||||
; SOFT-NEXT: vmov.32 r3, d17[1]
|
||||
; SOFT-NEXT: vmov r2, r3, d17
|
||||
; SOFT-NEXT: vmov r0, r1, d16
|
||||
; SOFT-NEXT: stm sp, {r0, r1, r2, r3}
|
||||
; SOFT-NEXT: bl __addtf3
|
||||
; SOFT-NEXT: add sp, sp, #16
|
||||
; SOFT-NEXT: pop {r11, pc}
|
||||
|
@ -1350,19 +1262,11 @@ define fp128 @test_f128_v16i8(<16 x i8> %p) {
|
|||
; HARD-NEXT: .pad #16
|
||||
; HARD-NEXT: sub sp, sp, #16
|
||||
; HARD-NEXT: vrev64.8 q8, q0
|
||||
; HARD-NEXT: add r12, sp, #12
|
||||
; HARD-NEXT: vadd.i8 q8, q8, q8
|
||||
; HARD-NEXT: vrev32.8 q8, q8
|
||||
; HARD-NEXT: vmov.32 r0, d16[0]
|
||||
; HARD-NEXT: vst1.32 {d17[1]}, [r12:32]
|
||||
; HARD-NEXT: add r12, sp, #8
|
||||
; HARD-NEXT: vst1.32 {d16[0]}, [sp:32]
|
||||
; HARD-NEXT: vst1.32 {d17[0]}, [r12:32]
|
||||
; HARD-NEXT: add r12, sp, #4
|
||||
; HARD-NEXT: vst1.32 {d16[1]}, [r12:32]
|
||||
; HARD-NEXT: vmov.32 r1, d16[1]
|
||||
; HARD-NEXT: vmov.32 r2, d17[0]
|
||||
; HARD-NEXT: vmov.32 r3, d17[1]
|
||||
; HARD-NEXT: vmov r2, r3, d17
|
||||
; HARD-NEXT: vmov r0, r1, d16
|
||||
; HARD-NEXT: stm sp, {r0, r1, r2, r3}
|
||||
; HARD-NEXT: bl __addtf3
|
||||
; HARD-NEXT: add sp, sp, #16
|
||||
; HARD-NEXT: pop {r11, pc}
|
||||
|
|
|
@ -75,10 +75,8 @@ define i64 @severalUses(<2 x i64>* %addr, <8 x i8>* %addr2) {
|
|||
; CHECK-LABEL: severalUses:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
|
||||
; CHECK-NEXT: vmov.32 r2, d16[1]
|
||||
; CHECK-NEXT: vmov.32 r0, d16[0]
|
||||
; CHECK-NEXT: vmov r0, r2, d16
|
||||
; CHECK-NEXT: vldr d18, [r1]
|
||||
; CHECK-NEXT: vmov d16, r0, r2
|
||||
; CHECK-NEXT: vtbl.8 d16, {d16, d17}, d18
|
||||
; CHECK-NEXT: vstr d16, [r1]
|
||||
; CHECK-NEXT: mov r1, r2
|
||||
|
|
|
@ -72,55 +72,47 @@ define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2,
|
|||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
|
||||
; CHECK-NEXT: vld1.64 {d22, d23}, [r0:128]!
|
||||
; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128]!
|
||||
; CHECK-NEXT: vld1.64 {d22, d23}, [r0:128]!
|
||||
; CHECK-NEXT: vmov r4, r6, d16
|
||||
; CHECK-NEXT: vld1.64 {d18, d19}, [r1:128]
|
||||
; CHECK-NEXT: mov r1, #0
|
||||
; CHECK-NEXT: vld1.64 {d20, d21}, [r0:128]
|
||||
; CHECK-NEXT: vmov.32 r12, d18[0]
|
||||
; CHECK-NEXT: vmov.32 r2, d20[0]
|
||||
; CHECK-NEXT: vmov.32 lr, d18[1]
|
||||
; CHECK-NEXT: vmov.32 r0, d20[1]
|
||||
; CHECK-NEXT: vmov.32 r7, d16[0]
|
||||
; CHECK-NEXT: vmov.32 r5, d22[0]
|
||||
; CHECK-NEXT: vmov.32 r4, d22[1]
|
||||
; CHECK-NEXT: vmov.32 r6, d19[0]
|
||||
; CHECK-NEXT: subs r2, r2, r12
|
||||
; CHECK-NEXT: vmov.32 r2, d16[1]
|
||||
; CHECK-NEXT: sbcs r0, r0, lr
|
||||
; CHECK-NEXT: vmov lr, r12, d18
|
||||
; CHECK-NEXT: mov r0, #0
|
||||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: mvnne r0, #0
|
||||
; CHECK-NEXT: subs r7, r5, r7
|
||||
; CHECK-NEXT: vmov.32 r7, d21[0]
|
||||
; CHECK-NEXT: vmov.32 r5, d19[1]
|
||||
; CHECK-NEXT: sbcs r2, r4, r2
|
||||
; CHECK-NEXT: vmov.32 r4, d21[1]
|
||||
; CHECK-NEXT: mov r2, #0
|
||||
; CHECK-NEXT: movlt r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: mvnne r2, #0
|
||||
; CHECK-NEXT: subs r7, r7, r6
|
||||
; CHECK-NEXT: vmov.32 r6, d23[0]
|
||||
; CHECK-NEXT: vmov.32 r7, d17[0]
|
||||
; CHECK-NEXT: sbcs r5, r4, r5
|
||||
; CHECK-NEXT: mov r4, #0
|
||||
; CHECK-NEXT: movlt r4, #1
|
||||
; CHECK-NEXT: vmov.32 r5, d17[1]
|
||||
; CHECK-NEXT: subs r7, r6, r7
|
||||
; CHECK-NEXT: vmov.32 r7, d23[1]
|
||||
; CHECK-NEXT: sbcs r7, r7, r5
|
||||
; CHECK-NEXT: vmov r2, r1, d20
|
||||
; CHECK-NEXT: subs r2, r2, lr
|
||||
; CHECK-NEXT: vmov r7, lr, d17
|
||||
; CHECK-NEXT: vmov r2, r5, d22
|
||||
; CHECK-NEXT: sbcs r1, r1, r12
|
||||
; CHECK-NEXT: mov r1, #0
|
||||
; CHECK-NEXT: movlt r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: mvnne r1, #0
|
||||
; CHECK-NEXT: cmp r4, #0
|
||||
; CHECK-NEXT: vdup.32 d25, r1
|
||||
; CHECK-NEXT: mvnne r4, #0
|
||||
; CHECK-NEXT: vdup.32 d24, r2
|
||||
; CHECK-NEXT: vdup.32 d27, r4
|
||||
; CHECK-NEXT: subs r2, r2, r4
|
||||
; CHECK-NEXT: sbcs r6, r5, r6
|
||||
; CHECK-NEXT: vmov r2, r12, d19
|
||||
; CHECK-NEXT: vmov r5, r4, d21
|
||||
; CHECK-NEXT: mov r6, #0
|
||||
; CHECK-NEXT: movlt r6, #1
|
||||
; CHECK-NEXT: cmp r6, #0
|
||||
; CHECK-NEXT: mvnne r6, #0
|
||||
; CHECK-NEXT: subs r2, r5, r2
|
||||
; CHECK-NEXT: sbcs r4, r4, r12
|
||||
; CHECK-NEXT: mov r2, #0
|
||||
; CHECK-NEXT: vmov r4, r5, d23
|
||||
; CHECK-NEXT: movlt r2, #1
|
||||
; CHECK-NEXT: subs r7, r4, r7
|
||||
; CHECK-NEXT: sbcs r7, r5, lr
|
||||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: mvnne r0, #0
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: vdup.32 d25, r0
|
||||
; CHECK-NEXT: mvnne r2, #0
|
||||
; CHECK-NEXT: vdup.32 d24, r6
|
||||
; CHECK-NEXT: vdup.32 d27, r2
|
||||
; CHECK-NEXT: vbit q8, q11, q12
|
||||
; CHECK-NEXT: vdup.32 d26, r0
|
||||
; CHECK-NEXT: vdup.32 d26, r1
|
||||
; CHECK-NEXT: vbit q9, q10, q13
|
||||
; CHECK-NEXT: vst1.64 {d16, d17}, [r3:128]!
|
||||
; CHECK-NEXT: vst1.64 {d18, d19}, [r3:128]
|
||||
|
@ -142,111 +134,98 @@ define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2,
|
|||
%T1_19* %blend, %T0_19* %storeaddr) {
|
||||
; CHECK-LABEL: func_blend19:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
|
||||
; CHECK-NEXT: add r2, r1, #48
|
||||
; CHECK-NEXT: add r5, r1, #32
|
||||
; CHECK-NEXT: mov r8, #0
|
||||
; CHECK-NEXT: vld1.64 {d16, d17}, [r2:128]
|
||||
; CHECK-NEXT: add r2, r0, #48
|
||||
; CHECK-NEXT: add r6, r0, #32
|
||||
; CHECK-NEXT: mov r7, #0
|
||||
; CHECK-NEXT: mov lr, #0
|
||||
; CHECK-NEXT: vld1.64 {d18, d19}, [r2:128]
|
||||
; CHECK-NEXT: vmov.32 r12, d16[0]
|
||||
; CHECK-NEXT: vmov.32 r2, d18[0]
|
||||
; CHECK-NEXT: vmov.32 lr, d16[1]
|
||||
; CHECK-NEXT: vmov.32 r4, d18[1]
|
||||
; CHECK-NEXT: vld1.64 {d28, d29}, [r0:128]!
|
||||
; CHECK-NEXT: vld1.64 {d26, d27}, [r5:128]
|
||||
; CHECK-NEXT: vld1.64 {d30, d31}, [r6:128]
|
||||
; CHECK-NEXT: vmov.32 r5, d17[0]
|
||||
; CHECK-NEXT: vld1.64 {d22, d23}, [r0:128]
|
||||
; CHECK-NEXT: vmov.32 r0, d17[1]
|
||||
; CHECK-NEXT: vld1.64 {d24, d25}, [r1:128]!
|
||||
; CHECK-NEXT: vld1.64 {d20, d21}, [r1:128]
|
||||
; CHECK-NEXT: mov r1, #0
|
||||
; CHECK-NEXT: subs r2, r2, r12
|
||||
; CHECK-NEXT: vmov r2, r12, d16
|
||||
; CHECK-NEXT: vmov r6, r7, d17
|
||||
; CHECK-NEXT: vmov r4, r5, d18
|
||||
; CHECK-NEXT: subs r2, r4, r2
|
||||
; CHECK-NEXT: sbcs r2, r5, r12
|
||||
; CHECK-NEXT: mov r12, #0
|
||||
; CHECK-NEXT: vmov.32 r2, d19[0]
|
||||
; CHECK-NEXT: sbcs r6, r4, lr
|
||||
; CHECK-NEXT: vmov.32 r4, d24[0]
|
||||
; CHECK-NEXT: vmov.32 r6, d19[1]
|
||||
; CHECK-NEXT: vmov r2, r4, d19
|
||||
; CHECK-NEXT: movlt r12, #1
|
||||
; CHECK-NEXT: cmp r12, #0
|
||||
; CHECK-NEXT: mov r5, r1
|
||||
; CHECK-NEXT: mvnne r12, #0
|
||||
; CHECK-NEXT: subs r2, r2, r5
|
||||
; CHECK-NEXT: vmov.32 r5, d28[0]
|
||||
; CHECK-NEXT: vld1.64 {d24, d25}, [r5:128]!
|
||||
; CHECK-NEXT: vld1.64 {d20, d21}, [r5:128]
|
||||
; CHECK-NEXT: subs r2, r2, r6
|
||||
; CHECK-NEXT: mov r2, r0
|
||||
; CHECK-NEXT: add r0, r0, #32
|
||||
; CHECK-NEXT: vld1.64 {d26, d27}, [r2:128]!
|
||||
; CHECK-NEXT: vld1.64 {d22, d23}, [r2:128]
|
||||
; CHECK-NEXT: sbcs r2, r4, r7
|
||||
; CHECK-NEXT: vmov r4, r5, d21
|
||||
; CHECK-NEXT: movlt r8, #1
|
||||
; CHECK-NEXT: vmov r6, r7, d23
|
||||
; CHECK-NEXT: cmp r8, #0
|
||||
; CHECK-NEXT: mvnne r8, #0
|
||||
; CHECK-NEXT: vld1.64 {d28, d29}, [r0:128]
|
||||
; CHECK-NEXT: add r0, r1, #32
|
||||
; CHECK-NEXT: vld1.64 {d30, d31}, [r0:128]
|
||||
; CHECK-NEXT: vmov r0, r1, d20
|
||||
; CHECK-NEXT: vdup.32 d7, r8
|
||||
; CHECK-NEXT: vdup.32 d6, r12
|
||||
; CHECK-NEXT: subs r4, r6, r4
|
||||
; CHECK-NEXT: sbcs r4, r7, r5
|
||||
; CHECK-NEXT: vmov r5, r6, d24
|
||||
; CHECK-NEXT: vmov r7, r2, d26
|
||||
; CHECK-NEXT: mov r4, #0
|
||||
; CHECK-NEXT: movlt r4, #1
|
||||
; CHECK-NEXT: cmp r4, #0
|
||||
; CHECK-NEXT: mvnne r4, #0
|
||||
; CHECK-NEXT: vdup.32 d5, r4
|
||||
; CHECK-NEXT: subs r5, r7, r5
|
||||
; CHECK-NEXT: sbcs r2, r2, r6
|
||||
; CHECK-NEXT: vmov r7, r6, d27
|
||||
; CHECK-NEXT: vmov r2, r9, d25
|
||||
; CHECK-NEXT: mov r5, #0
|
||||
; CHECK-NEXT: movlt r5, #1
|
||||
; CHECK-NEXT: cmp r5, #0
|
||||
; CHECK-NEXT: mvnne r5, #0
|
||||
; CHECK-NEXT: subs r2, r7, r2
|
||||
; CHECK-NEXT: sbcs r2, r6, r9
|
||||
; CHECK-NEXT: vmov r6, r7, d22
|
||||
; CHECK-NEXT: mov r2, #0
|
||||
; CHECK-NEXT: sbcs r0, r6, r0
|
||||
; CHECK-NEXT: vmov.32 r6, d28[1]
|
||||
; CHECK-NEXT: vmov.32 r0, d24[1]
|
||||
; CHECK-NEXT: movlt r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: mvnne r2, #0
|
||||
; CHECK-NEXT: vdup.32 d7, r2
|
||||
; CHECK-NEXT: vdup.32 d6, r12
|
||||
; CHECK-NEXT: subs r5, r5, r4
|
||||
; CHECK-NEXT: vmov.32 r4, d25[1]
|
||||
; CHECK-NEXT: vmov.32 r5, d25[0]
|
||||
; CHECK-NEXT: sbcs r0, r6, r0
|
||||
; CHECK-NEXT: vdup.32 d1, r2
|
||||
; CHECK-NEXT: vdup.32 d0, r5
|
||||
; CHECK-NEXT: vbit q12, q13, q0
|
||||
; CHECK-NEXT: subs r0, r6, r0
|
||||
; CHECK-NEXT: vmov r2, r6, d28
|
||||
; CHECK-NEXT: sbcs r0, r7, r1
|
||||
; CHECK-NEXT: mov r7, #0
|
||||
; CHECK-NEXT: vmov r0, r1, d30
|
||||
; CHECK-NEXT: movlt r7, #1
|
||||
; CHECK-NEXT: subs r0, r2, r0
|
||||
; CHECK-NEXT: vmov r2, r5, d29
|
||||
; CHECK-NEXT: sbcs r0, r6, r1
|
||||
; CHECK-NEXT: mov r6, #0
|
||||
; CHECK-NEXT: vmov.32 r0, d29[0]
|
||||
; CHECK-NEXT: vmov r0, r1, d31
|
||||
; CHECK-NEXT: movlt r6, #1
|
||||
; CHECK-NEXT: subs r0, r2, r0
|
||||
; CHECK-NEXT: sbcs r0, r5, r1
|
||||
; CHECK-NEXT: movlt lr, #1
|
||||
; CHECK-NEXT: cmp lr, #0
|
||||
; CHECK-NEXT: mvnne lr, #0
|
||||
; CHECK-NEXT: cmp r6, #0
|
||||
; CHECK-NEXT: mvnne r6, #0
|
||||
; CHECK-NEXT: subs r0, r0, r5
|
||||
; CHECK-NEXT: vmov.32 r5, d21[0]
|
||||
; CHECK-NEXT: vmov.32 r0, d29[1]
|
||||
; CHECK-NEXT: sbcs r0, r0, r4
|
||||
; CHECK-NEXT: vmov.32 r4, d23[0]
|
||||
; CHECK-NEXT: mov r0, #0
|
||||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: mvnne r0, #0
|
||||
; CHECK-NEXT: vdup.32 d1, r0
|
||||
; CHECK-NEXT: mov r0, #0
|
||||
; CHECK-NEXT: vdup.32 d0, r6
|
||||
; CHECK-NEXT: vmov.32 r6, d22[0]
|
||||
; CHECK-NEXT: vbit q12, q14, q0
|
||||
; CHECK-NEXT: subs r5, r4, r5
|
||||
; CHECK-NEXT: vmov.32 r4, d23[1]
|
||||
; CHECK-NEXT: vmov.32 r5, d21[1]
|
||||
; CHECK-NEXT: sbcs r5, r4, r5
|
||||
; CHECK-NEXT: vmov.32 r4, d20[1]
|
||||
; CHECK-NEXT: vmov.32 r5, d20[0]
|
||||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: mvnne r0, #0
|
||||
; CHECK-NEXT: vdup.32 d5, r0
|
||||
; CHECK-NEXT: add r0, r3, #32
|
||||
; CHECK-NEXT: subs r6, r6, r5
|
||||
; CHECK-NEXT: vmov.32 r5, d26[0]
|
||||
; CHECK-NEXT: vmov.32 r6, d22[1]
|
||||
; CHECK-NEXT: sbcs r6, r6, r4
|
||||
; CHECK-NEXT: mov r4, #0
|
||||
; CHECK-NEXT: vmov.32 r6, d30[0]
|
||||
; CHECK-NEXT: movlt r4, #1
|
||||
; CHECK-NEXT: subs r6, r6, r5
|
||||
; CHECK-NEXT: vmov.32 r5, d30[1]
|
||||
; CHECK-NEXT: vmov.32 r6, d26[1]
|
||||
; CHECK-NEXT: sbcs r6, r5, r6
|
||||
; CHECK-NEXT: vmov.32 r5, d31[0]
|
||||
; CHECK-NEXT: vmov.32 r6, d27[0]
|
||||
; CHECK-NEXT: movlt r1, #1
|
||||
; CHECK-NEXT: subs r6, r5, r6
|
||||
; CHECK-NEXT: vmov.32 r5, d31[1]
|
||||
; CHECK-NEXT: vmov.32 r6, d27[1]
|
||||
; CHECK-NEXT: sbcs r6, r5, r6
|
||||
; CHECK-NEXT: movlt r7, #1
|
||||
; CHECK-NEXT: vdup.32 d3, lr
|
||||
; CHECK-NEXT: vdup.32 d2, r6
|
||||
; CHECK-NEXT: cmp r7, #0
|
||||
; CHECK-NEXT: vorr q13, q1, q1
|
||||
; CHECK-NEXT: mvnne r7, #0
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: mvnne r1, #0
|
||||
; CHECK-NEXT: vdup.32 d3, r7
|
||||
; CHECK-NEXT: cmp r4, #0
|
||||
; CHECK-NEXT: vdup.32 d2, r1
|
||||
; CHECK-NEXT: mvnne r4, #0
|
||||
; CHECK-NEXT: vbit q13, q15, q1
|
||||
; CHECK-NEXT: vdup.32 d4, r4
|
||||
; CHECK-NEXT: vdup.32 d4, r7
|
||||
; CHECK-NEXT: add r0, r3, #32
|
||||
; CHECK-NEXT: vbsl q13, q14, q15
|
||||
; CHECK-NEXT: vbit q10, q11, q2
|
||||
; CHECK-NEXT: vbit q8, q9, q3
|
||||
; CHECK-NEXT: vst1.64 {d26, d27}, [r0:128]
|
||||
|
@ -254,7 +233,7 @@ define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2,
|
|||
; CHECK-NEXT: vst1.64 {d24, d25}, [r3:128]!
|
||||
; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]
|
||||
; CHECK-NEXT: vst1.64 {d20, d21}, [r3:128]
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, r11, lr}
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr}
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
%v0 = load %T0_19, %T0_19* %loadaddr
|
||||
%v1 = load %T0_19, %T0_19* %loadaddr2
|
||||
|
@ -280,202 +259,170 @@ define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2,
|
|||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: .pad #8
|
||||
; CHECK-NEXT: sub sp, sp, #8
|
||||
; CHECK-NEXT: mov r8, r1
|
||||
; CHECK-NEXT: mov r9, r0
|
||||
; CHECK-NEXT: vld1.64 {d16, d17}, [r8:128]!
|
||||
; CHECK-NEXT: add r10, r0, #64
|
||||
; CHECK-NEXT: vld1.64 {d18, d19}, [r9:128]!
|
||||
; CHECK-NEXT: vmov.32 r2, d16[0]
|
||||
; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
|
||||
; CHECK-NEXT: vmov.32 r6, d18[0]
|
||||
; CHECK-NEXT: vmov.32 r4, d16[1]
|
||||
; CHECK-NEXT: vmov.32 r7, d18[1]
|
||||
; CHECK-NEXT: vmov.32 r5, d17[0]
|
||||
; CHECK-NEXT: subs r2, r6, r2
|
||||
; CHECK-NEXT: mov r6, #0
|
||||
; CHECK-NEXT: vmov.32 r2, d19[0]
|
||||
; CHECK-NEXT: sbcs r7, r7, r4
|
||||
; CHECK-NEXT: movlt r6, #1
|
||||
; CHECK-NEXT: vmov.32 r7, d17[1]
|
||||
; CHECK-NEXT: subs r2, r2, r5
|
||||
; CHECK-NEXT: vmov.32 r2, d19[1]
|
||||
; CHECK-NEXT: sbcs r2, r2, r7
|
||||
; CHECK-NEXT: mov r2, #0
|
||||
; CHECK-NEXT: movlt r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: mvnne r2, #0
|
||||
; CHECK-NEXT: cmp r6, #0
|
||||
; CHECK-NEXT: vdup.32 d21, r2
|
||||
; CHECK-NEXT: mvnne r6, #0
|
||||
; CHECK-NEXT: vdup.32 d20, r6
|
||||
; CHECK-NEXT: add r9, r1, #64
|
||||
; CHECK-NEXT: mov r2, #32
|
||||
; CHECK-NEXT: add r6, r1, #64
|
||||
; CHECK-NEXT: vld1.64 {d24, d25}, [r10:128], r2
|
||||
; CHECK-NEXT: add r8, r0, #64
|
||||
; CHECK-NEXT: vld1.64 {d16, d17}, [r9:128], r2
|
||||
; CHECK-NEXT: mov r10, r1
|
||||
; CHECK-NEXT: mov r11, r0
|
||||
; CHECK-NEXT: vld1.64 {d18, d19}, [r8:128], r2
|
||||
; CHECK-NEXT: vmov r7, r5, d17
|
||||
; CHECK-NEXT: vmov r6, r2, d19
|
||||
; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
|
||||
; CHECK-NEXT: vld1.64 {d22, d23}, [r10:128]!
|
||||
; CHECK-NEXT: subs r7, r6, r7
|
||||
; CHECK-NEXT: sbcs r2, r2, r5
|
||||
; CHECK-NEXT: vmov r5, r6, d16
|
||||
; CHECK-NEXT: vmov r7, r4, d18
|
||||
; CHECK-NEXT: mov r2, #0
|
||||
; CHECK-NEXT: movlt r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: mvnne r2, #0
|
||||
; CHECK-NEXT: vdup.32 d21, r2
|
||||
; CHECK-NEXT: subs r5, r7, r5
|
||||
; CHECK-NEXT: sbcs r4, r4, r6
|
||||
; CHECK-NEXT: mov r4, #0
|
||||
; CHECK-NEXT: movlt r4, #1
|
||||
; CHECK-NEXT: cmp r4, #0
|
||||
; CHECK-NEXT: mvnne r4, #0
|
||||
; CHECK-NEXT: vdup.32 d20, r4
|
||||
; CHECK-NEXT: vmov r2, r4, d23
|
||||
; CHECK-NEXT: vbit q8, q9, q10
|
||||
; CHECK-NEXT: vld1.64 {d28, d29}, [r6:128], r2
|
||||
; CHECK-NEXT: vmov.32 r4, d29[0]
|
||||
; CHECK-NEXT: vmov.32 r5, d25[0]
|
||||
; CHECK-NEXT: vld1.64 {d0, d1}, [r9:128]
|
||||
; CHECK-NEXT: vld1.64 {d2, d3}, [r8:128]
|
||||
; CHECK-NEXT: vld1.64 {d22, d23}, [r6:128]!
|
||||
; CHECK-NEXT: vld1.64 {d20, d21}, [r6:128]
|
||||
; CHECK-NEXT: vmov.32 r6, d0[0]
|
||||
; CHECK-NEXT: vld1.64 {d18, d19}, [r10:128]!
|
||||
; CHECK-NEXT: vmov.32 r9, d23[0]
|
||||
; CHECK-NEXT: vmov.32 r11, d19[0]
|
||||
; CHECK-NEXT: vmov.32 r8, d23[1]
|
||||
; CHECK-NEXT: subs r4, r5, r4
|
||||
; CHECK-NEXT: vmov.32 r5, d25[1]
|
||||
; CHECK-NEXT: vmov.32 r4, d29[1]
|
||||
; CHECK-NEXT: sbcs r4, r5, r4
|
||||
; CHECK-NEXT: vmov.32 r5, d24[0]
|
||||
; CHECK-NEXT: mov r4, #0
|
||||
; CHECK-NEXT: movlt r4, #1
|
||||
; CHECK-NEXT: cmp r4, #0
|
||||
; CHECK-NEXT: mvnne r4, #0
|
||||
; CHECK-NEXT: vdup.32 d5, r4
|
||||
; CHECK-NEXT: vmov.32 r4, d28[0]
|
||||
; CHECK-NEXT: subs r4, r5, r4
|
||||
; CHECK-NEXT: vmov.32 r5, d24[1]
|
||||
; CHECK-NEXT: vmov.32 r4, d28[1]
|
||||
; CHECK-NEXT: sbcs r4, r5, r4
|
||||
; CHECK-NEXT: vmov.32 r5, d1[0]
|
||||
; CHECK-NEXT: mov r4, #0
|
||||
; CHECK-NEXT: movlt r4, #1
|
||||
; CHECK-NEXT: cmp r4, #0
|
||||
; CHECK-NEXT: mvnne r4, #0
|
||||
; CHECK-NEXT: vdup.32 d4, r4
|
||||
; CHECK-NEXT: vmov.32 r4, d3[0]
|
||||
; CHECK-NEXT: subs r4, r5, r4
|
||||
; CHECK-NEXT: vmov.32 r5, d1[1]
|
||||
; CHECK-NEXT: vmov.32 r4, d3[1]
|
||||
; CHECK-NEXT: sbcs r4, r5, r4
|
||||
; CHECK-NEXT: add r5, r1, #32
|
||||
; CHECK-NEXT: vld1.64 {d26, d27}, [r5:128]
|
||||
; CHECK-NEXT: add r5, r1, #48
|
||||
; CHECK-NEXT: mov r4, #0
|
||||
; CHECK-NEXT: add r1, r1, #80
|
||||
; CHECK-NEXT: vld1.64 {d30, d31}, [r5:128]
|
||||
; CHECK-NEXT: movlt r4, #1
|
||||
; CHECK-NEXT: cmp r4, #0
|
||||
; CHECK-NEXT: vbif q12, q14, q2
|
||||
; CHECK-NEXT: vmov.32 r5, d2[0]
|
||||
; CHECK-NEXT: mvnne r4, #0
|
||||
; CHECK-NEXT: vdup.32 d29, r4
|
||||
; CHECK-NEXT: vmov.32 r4, d31[1]
|
||||
; CHECK-NEXT: subs r5, r6, r5
|
||||
; CHECK-NEXT: vmov.32 r6, d0[1]
|
||||
; CHECK-NEXT: vmov.32 r5, d2[1]
|
||||
; CHECK-NEXT: sbcs r5, r6, r5
|
||||
; CHECK-NEXT: add r6, r0, #48
|
||||
; CHECK-NEXT: mov r5, #0
|
||||
; CHECK-NEXT: vld1.64 {d6, d7}, [r6:128]
|
||||
; CHECK-NEXT: movlt r5, #1
|
||||
; CHECK-NEXT: cmp r5, #0
|
||||
; CHECK-NEXT: mvnne r5, #0
|
||||
; CHECK-NEXT: vmov.32 r7, d7[0]
|
||||
; CHECK-NEXT: vdup.32 d28, r5
|
||||
; CHECK-NEXT: vmov.32 r5, d31[0]
|
||||
; CHECK-NEXT: vbsl q14, q0, q1
|
||||
; CHECK-NEXT: vmov.32 r6, d7[1]
|
||||
; CHECK-NEXT: vmov.32 r2, d6[0]
|
||||
; CHECK-NEXT: subs r5, r7, r5
|
||||
; CHECK-NEXT: vmov.32 r7, d6[1]
|
||||
; CHECK-NEXT: sbcs r4, r6, r4
|
||||
; CHECK-NEXT: vmov.32 r6, d30[0]
|
||||
; CHECK-NEXT: vmov.32 r5, d30[1]
|
||||
; CHECK-NEXT: mov r4, #0
|
||||
; CHECK-NEXT: movlt r4, #1
|
||||
; CHECK-NEXT: cmp r4, #0
|
||||
; CHECK-NEXT: mvnne r4, #0
|
||||
; CHECK-NEXT: vdup.32 d3, r4
|
||||
; CHECK-NEXT: vmov.32 r4, d26[1]
|
||||
; CHECK-NEXT: subs r2, r2, r6
|
||||
; CHECK-NEXT: sbcs r2, r7, r5
|
||||
; CHECK-NEXT: add r5, r0, #32
|
||||
; CHECK-NEXT: mov r2, #0
|
||||
; CHECK-NEXT: vld1.64 {d0, d1}, [r5:128]
|
||||
; CHECK-NEXT: movlt r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: mvnne r2, #0
|
||||
; CHECK-NEXT: vmov.32 r6, d0[0]
|
||||
; CHECK-NEXT: vdup.32 d2, r2
|
||||
; CHECK-NEXT: add r0, r0, #80
|
||||
; CHECK-NEXT: vmov.32 r2, d26[0]
|
||||
; CHECK-NEXT: vbit q15, q3, q1
|
||||
; CHECK-NEXT: vmov.32 r5, d0[1]
|
||||
; CHECK-NEXT: vmov.32 r7, d1[0]
|
||||
; CHECK-NEXT: vld1.64 {d2, d3}, [r10:128]
|
||||
; CHECK-NEXT: vld1.64 {d6, d7}, [r1:128]
|
||||
; CHECK-NEXT: vld1.64 {d8, d9}, [r0:128]
|
||||
; CHECK-NEXT: vmov.32 r1, d7[1]
|
||||
; CHECK-NEXT: vmov.32 r10, d19[1]
|
||||
; CHECK-NEXT: vmov.32 lr, d6[0]
|
||||
; CHECK-NEXT: vmov.32 r3, d8[0]
|
||||
; CHECK-NEXT: vmov.32 r12, d8[1]
|
||||
; CHECK-NEXT: subs r2, r6, r2
|
||||
; CHECK-NEXT: vmov.32 r6, d1[1]
|
||||
; CHECK-NEXT: vld1.64 {d18, d19}, [r11:128]!
|
||||
; CHECK-NEXT: vmov r7, r5, d19
|
||||
; CHECK-NEXT: subs r2, r7, r2
|
||||
; CHECK-NEXT: sbcs r2, r5, r4
|
||||
; CHECK-NEXT: vmov.32 r5, d27[0]
|
||||
; CHECK-NEXT: vmov.32 r4, d27[1]
|
||||
; CHECK-NEXT: vmov r5, r7, d18
|
||||
; CHECK-NEXT: mov r2, #0
|
||||
; CHECK-NEXT: movlt r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: mvnne r2, #0
|
||||
; CHECK-NEXT: subs r5, r7, r5
|
||||
; CHECK-NEXT: vmov.32 r7, d7[0]
|
||||
; CHECK-NEXT: sbcs r4, r6, r4
|
||||
; CHECK-NEXT: vmov.32 r6, d2[0]
|
||||
; CHECK-NEXT: vdup.32 d21, r2
|
||||
; CHECK-NEXT: vmov r2, r4, d22
|
||||
; CHECK-NEXT: subs r2, r5, r2
|
||||
; CHECK-NEXT: sbcs r2, r7, r4
|
||||
; CHECK-NEXT: mov r2, #0
|
||||
; CHECK-NEXT: movlt r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: mvnne r2, #0
|
||||
; CHECK-NEXT: vdup.32 d20, r2
|
||||
; CHECK-NEXT: add r2, r0, #48
|
||||
; CHECK-NEXT: vbif q9, q11, q10
|
||||
; CHECK-NEXT: vld1.64 {d30, d31}, [r2:128]
|
||||
; CHECK-NEXT: add r2, r1, #48
|
||||
; CHECK-NEXT: vld1.64 {d2, d3}, [r2:128]
|
||||
; CHECK-NEXT: vmov r5, r7, d30
|
||||
; CHECK-NEXT: vmov r2, r4, d2
|
||||
; CHECK-NEXT: vld1.64 {d26, d27}, [r11:128]
|
||||
; CHECK-NEXT: vld1.64 {d0, d1}, [r10:128]
|
||||
; CHECK-NEXT: vld1.64 {d24, d25}, [r9:128]!
|
||||
; CHECK-NEXT: vld1.64 {d22, d23}, [r9:128]
|
||||
; CHECK-NEXT: vld1.64 {d20, d21}, [r8:128]!
|
||||
; CHECK-NEXT: vmov r11, r10, d21
|
||||
; CHECK-NEXT: subs r2, r5, r2
|
||||
; CHECK-NEXT: sbcs r2, r7, r4
|
||||
; CHECK-NEXT: vmov r7, r6, d31
|
||||
; CHECK-NEXT: vmov r2, r5, d3
|
||||
; CHECK-NEXT: mov r4, #0
|
||||
; CHECK-NEXT: movlt r4, #1
|
||||
; CHECK-NEXT: cmp r4, #0
|
||||
; CHECK-NEXT: mvnne r4, #0
|
||||
; CHECK-NEXT: subs r2, r7, r2
|
||||
; CHECK-NEXT: mov r7, #0
|
||||
; CHECK-NEXT: sbcs r2, r6, r5
|
||||
; CHECK-NEXT: vmov r6, r5, d27
|
||||
; CHECK-NEXT: vmov r2, r9, d1
|
||||
; CHECK-NEXT: movlt r7, #1
|
||||
; CHECK-NEXT: cmp r7, #0
|
||||
; CHECK-NEXT: mvnne r7, #0
|
||||
; CHECK-NEXT: vdup.32 d7, r7
|
||||
; CHECK-NEXT: vdup.32 d6, r4
|
||||
; CHECK-NEXT: subs r2, r6, r2
|
||||
; CHECK-NEXT: sbcs r2, r5, r9
|
||||
; CHECK-NEXT: vmov r6, r5, d26
|
||||
; CHECK-NEXT: mov r2, #0
|
||||
; CHECK-NEXT: movlt r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: mvnne r2, #0
|
||||
; CHECK-NEXT: vdup.32 d5, r2
|
||||
; CHECK-NEXT: vmov r2, r9, d0
|
||||
; CHECK-NEXT: subs r2, r6, r2
|
||||
; CHECK-NEXT: sbcs r2, r5, r9
|
||||
; CHECK-NEXT: mov r2, #0
|
||||
; CHECK-NEXT: movlt r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: mvnne r2, #0
|
||||
; CHECK-NEXT: vdup.32 d4, r2
|
||||
; CHECK-NEXT: add r2, r1, #32
|
||||
; CHECK-NEXT: vld1.64 {d28, d29}, [r2:128]
|
||||
; CHECK-NEXT: add r2, r0, #32
|
||||
; CHECK-NEXT: vbif q13, q0, q2
|
||||
; CHECK-NEXT: add r1, r1, #80
|
||||
; CHECK-NEXT: vld1.64 {d0, d1}, [r2:128]
|
||||
; CHECK-NEXT: vmov r4, r5, d28
|
||||
; CHECK-NEXT: vbif q15, q1, q3
|
||||
; CHECK-NEXT: add r0, r0, #80
|
||||
; CHECK-NEXT: vmov r2, r6, d0
|
||||
; CHECK-NEXT: vld1.64 {d2, d3}, [r8:128]
|
||||
; CHECK-NEXT: vmov r9, r8, d25
|
||||
; CHECK-NEXT: vld1.64 {d8, d9}, [r0:128]
|
||||
; CHECK-NEXT: vld1.64 {d6, d7}, [r1:128]
|
||||
; CHECK-NEXT: vmov r3, r12, d8
|
||||
; CHECK-NEXT: subs r2, r2, r4
|
||||
; CHECK-NEXT: sbcs r2, r6, r5
|
||||
; CHECK-NEXT: vmov r4, r5, d29
|
||||
; CHECK-NEXT: vmov r6, r7, d1
|
||||
; CHECK-NEXT: mov r2, #0
|
||||
; CHECK-NEXT: movlt r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: mvnne r2, #0
|
||||
; CHECK-NEXT: subs r4, r6, r4
|
||||
; CHECK-NEXT: sbcs r4, r7, r5
|
||||
; CHECK-NEXT: vmov r5, r6, d2
|
||||
; CHECK-NEXT: mov r4, #0
|
||||
; CHECK-NEXT: vmov.32 r5, d2[1]
|
||||
; CHECK-NEXT: movlt r4, #1
|
||||
; CHECK-NEXT: cmp r4, #0
|
||||
; CHECK-NEXT: mvnne r4, #0
|
||||
; CHECK-NEXT: vdup.32 d5, r4
|
||||
; CHECK-NEXT: vdup.32 d4, r2
|
||||
; CHECK-NEXT: vmov.32 r2, d20[0]
|
||||
; CHECK-NEXT: vbit q13, q0, q2
|
||||
; CHECK-NEXT: vmov.32 r4, d20[1]
|
||||
; CHECK-NEXT: subs r0, r6, r2
|
||||
; CHECK-NEXT: vmov.32 r2, d9[1]
|
||||
; CHECK-NEXT: sbcs r0, r5, r4
|
||||
; CHECK-NEXT: vmov.32 r4, d9[0]
|
||||
; CHECK-NEXT: mov r0, #0
|
||||
; CHECK-NEXT: vmov.32 r6, d18[0]
|
||||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: mvnne r0, #0
|
||||
; CHECK-NEXT: vmov.32 r5, d18[1]
|
||||
; CHECK-NEXT: subs r4, r4, r7
|
||||
; CHECK-NEXT: vmov.32 r7, d21[1]
|
||||
; CHECK-NEXT: sbcs r1, r2, r1
|
||||
; CHECK-NEXT: vmov.32 r4, d22[1]
|
||||
; CHECK-NEXT: vmov.32 r1, d22[0]
|
||||
; CHECK-NEXT: vmov r2, r4, d22
|
||||
; CHECK-NEXT: vbit q14, q0, q2
|
||||
; CHECK-NEXT: subs r2, r5, r2
|
||||
; CHECK-NEXT: sbcs r2, r6, r4
|
||||
; CHECK-NEXT: vmov r4, r5, d24
|
||||
; CHECK-NEXT: vmov r6, r7, d20
|
||||
; CHECK-NEXT: mov r2, #0
|
||||
; CHECK-NEXT: movlt r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: mvnne r2, #0
|
||||
; CHECK-NEXT: vdup.32 d11, r2
|
||||
; CHECK-NEXT: vmov.32 r2, d3[1]
|
||||
; CHECK-NEXT: subs r1, r6, r1
|
||||
; CHECK-NEXT: vmov.32 r6, d21[0]
|
||||
; CHECK-NEXT: sbcs r1, r5, r4
|
||||
; CHECK-NEXT: vmov.32 r4, d3[0]
|
||||
; CHECK-NEXT: vmov.32 r5, d6[1]
|
||||
; CHECK-NEXT: subs r1, r6, r4
|
||||
; CHECK-NEXT: vmov r0, r6, d9
|
||||
; CHECK-NEXT: sbcs r1, r7, r5
|
||||
; CHECK-NEXT: vmov r4, r5, d7
|
||||
; CHECK-NEXT: mov r1, #0
|
||||
; CHECK-NEXT: movlt r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: mvnne r1, #0
|
||||
; CHECK-NEXT: subs r4, r4, r6
|
||||
; CHECK-NEXT: sbcs r2, r2, r7
|
||||
; CHECK-NEXT: mov r2, #0
|
||||
; CHECK-NEXT: movlt r2, #1
|
||||
; CHECK-NEXT: subs r0, r0, r4
|
||||
; CHECK-NEXT: vmov r7, r4, d23
|
||||
; CHECK-NEXT: sbcs r0, r6, r5
|
||||
; CHECK-NEXT: vmov r5, lr, d6
|
||||
; CHECK-NEXT: mov r0, #0
|
||||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: mvnne r0, #0
|
||||
; CHECK-NEXT: vdup.32 d11, r0
|
||||
; CHECK-NEXT: vmov r0, r6, d3
|
||||
; CHECK-NEXT: subs r0, r0, r7
|
||||
; CHECK-NEXT: sbcs r0, r6, r4
|
||||
; CHECK-NEXT: mov r0, #0
|
||||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: subs r4, r11, r9
|
||||
; CHECK-NEXT: sbcs r4, r10, r8
|
||||
; CHECK-NEXT: mov r4, #0
|
||||
; CHECK-NEXT: movlt r4, #1
|
||||
; CHECK-NEXT: subs r3, r3, lr
|
||||
; CHECK-NEXT: sbcs r3, r12, r5
|
||||
; CHECK-NEXT: subs r3, r3, r5
|
||||
; CHECK-NEXT: sbcs r3, r12, lr
|
||||
; CHECK-NEXT: mov r3, #0
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
|
@ -485,28 +432,28 @@ define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2,
|
|||
; CHECK-NEXT: vdup.32 d10, r3
|
||||
; CHECK-NEXT: vdup.32 d1, r4
|
||||
; CHECK-NEXT: vorr q2, q5, q5
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: vdup.32 d0, r1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: vbsl q2, q4, q3
|
||||
; CHECK-NEXT: mvnne r2, #0
|
||||
; CHECK-NEXT: vbif q9, q11, q0
|
||||
; CHECK-NEXT: mvnne r0, #0
|
||||
; CHECK-NEXT: vbif q10, q12, q0
|
||||
; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-NEXT: vdup.32 d7, r2
|
||||
; CHECK-NEXT: vdup.32 d6, r0
|
||||
; CHECK-NEXT: vdup.32 d7, r0
|
||||
; CHECK-NEXT: add r0, r1, #80
|
||||
; CHECK-NEXT: vbit q10, q1, q3
|
||||
; CHECK-NEXT: vdup.32 d6, r2
|
||||
; CHECK-NEXT: vbit q11, q1, q3
|
||||
; CHECK-NEXT: vst1.64 {d4, d5}, [r0:128]
|
||||
; CHECK-NEXT: add r0, r1, #32
|
||||
; CHECK-NEXT: vst1.64 {d26, d27}, [r0:128]
|
||||
; CHECK-NEXT: vst1.64 {d28, d29}, [r0:128]
|
||||
; CHECK-NEXT: add r0, r1, #48
|
||||
; CHECK-NEXT: vst1.64 {d30, d31}, [r0:128]
|
||||
; CHECK-NEXT: add r0, r1, #64
|
||||
; CHECK-NEXT: vst1.64 {d16, d17}, [r1:128]!
|
||||
; CHECK-NEXT: vst1.64 {d28, d29}, [r1:128]
|
||||
; CHECK-NEXT: vst1.64 {d18, d19}, [r1:128]!
|
||||
; CHECK-NEXT: vst1.64 {d26, d27}, [r1:128]
|
||||
; CHECK-NEXT: mov r1, #32
|
||||
; CHECK-NEXT: vst1.64 {d24, d25}, [r0:128], r1
|
||||
; CHECK-NEXT: vst1.64 {d18, d19}, [r0:128]!
|
||||
; CHECK-NEXT: vst1.64 {d20, d21}, [r0:128]
|
||||
; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128], r1
|
||||
; CHECK-NEXT: vst1.64 {d20, d21}, [r0:128]!
|
||||
; CHECK-NEXT: vst1.64 {d22, d23}, [r0:128]
|
||||
; CHECK-NEXT: add sp, sp, #8
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: add sp, sp, #4
|
||||
|
|
|
@ -1298,19 +1298,21 @@ for.body: ; preds = %for.body.preheader1
|
|||
define arm_aapcs_vfpcc void @half_short_mul(half* nocapture readonly %a, i16* nocapture readonly %b, float* nocapture %c, i32 %N) {
|
||||
; CHECK-LABEL: half_short_mul:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
; CHECK-NEXT: sub sp, #4
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: beq .LBB8_8
|
||||
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
|
||||
; CHECK-NEXT: cmp r3, #3
|
||||
; CHECK-NEXT: bhi .LBB8_3
|
||||
; CHECK-NEXT: @ %bb.2:
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: movs r7, #0
|
||||
; CHECK-NEXT: b .LBB8_6
|
||||
; CHECK-NEXT: .LBB8_3: @ %vector.ph
|
||||
; CHECK-NEXT: bic r12, r3, #3
|
||||
; CHECK-NEXT: bic r7, r3, #3
|
||||
; CHECK-NEXT: str r7, [sp] @ 4-byte Spill
|
||||
; CHECK-NEXT: subs r6, r7, #4
|
||||
; CHECK-NEXT: movs r5, #1
|
||||
; CHECK-NEXT: sub.w r6, r12, #4
|
||||
; CHECK-NEXT: mov r4, r0
|
||||
; CHECK-NEXT: add.w lr, r5, r6, lsr #2
|
||||
; CHECK-NEXT: mov r5, r1
|
||||
|
@ -1319,19 +1321,17 @@ define arm_aapcs_vfpcc void @half_short_mul(half* nocapture readonly %a, i16* no
|
|||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrh.u32 q0, [r5], #8
|
||||
; CHECK-NEXT: ldr.w r9, [r4]
|
||||
; CHECK-NEXT: ldr.w r8, [r4, #4]
|
||||
; CHECK-NEXT: ldr.w r10, [r4, #4]
|
||||
; CHECK-NEXT: adds r4, #8
|
||||
; CHECK-NEXT: vmov r7, s0
|
||||
; CHECK-NEXT: vmov.16 q1[0], r7
|
||||
; CHECK-NEXT: vmov r7, s1
|
||||
; CHECK-NEXT: vmov.16 q1[1], r7
|
||||
; CHECK-NEXT: vmov r7, s2
|
||||
; CHECK-NEXT: vmov.16 q1[2], r7
|
||||
; CHECK-NEXT: vmov r7, s3
|
||||
; CHECK-NEXT: vmov.16 q1[3], r7
|
||||
; CHECK-NEXT: vcvt.f16.s16 q0, q1
|
||||
; CHECK-NEXT: vmov r7, r12, d0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r9
|
||||
; CHECK-NEXT: vmov.32 q1[1], r8
|
||||
; CHECK-NEXT: vmov r11, r8, d1
|
||||
; CHECK-NEXT: vmov.16 q0[0], r7
|
||||
; CHECK-NEXT: vmov.16 q0[1], r12
|
||||
; CHECK-NEXT: vmov.32 q1[1], r10
|
||||
; CHECK-NEXT: vmov.16 q0[2], r11
|
||||
; CHECK-NEXT: vmov.16 q0[3], r8
|
||||
; CHECK-NEXT: vcvt.f16.s16 q0, q0
|
||||
; CHECK-NEXT: vmul.f16 q0, q1, q0
|
||||
; CHECK-NEXT: vcvtt.f32.f16 s7, s1
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s6, s1
|
||||
|
@ -1340,13 +1340,14 @@ define arm_aapcs_vfpcc void @half_short_mul(half* nocapture readonly %a, i16* no
|
|||
; CHECK-NEXT: vstrb.8 q1, [r6], #16
|
||||
; CHECK-NEXT: le lr, .LBB8_4
|
||||
; CHECK-NEXT: @ %bb.5: @ %middle.block
|
||||
; CHECK-NEXT: cmp r12, r3
|
||||
; CHECK-NEXT: ldr r7, [sp] @ 4-byte Reload
|
||||
; CHECK-NEXT: cmp r7, r3
|
||||
; CHECK-NEXT: beq .LBB8_8
|
||||
; CHECK-NEXT: .LBB8_6: @ %for.body.preheader13
|
||||
; CHECK-NEXT: sub.w lr, r3, r12
|
||||
; CHECK-NEXT: add.w r0, r0, r12, lsl #1
|
||||
; CHECK-NEXT: add.w r1, r1, r12, lsl #1
|
||||
; CHECK-NEXT: add.w r2, r2, r12, lsl #2
|
||||
; CHECK-NEXT: sub.w lr, r3, r7
|
||||
; CHECK-NEXT: add.w r0, r0, r7, lsl #1
|
||||
; CHECK-NEXT: add.w r1, r1, r7, lsl #1
|
||||
; CHECK-NEXT: add.w r2, r2, r7, lsl #2
|
||||
; CHECK-NEXT: .LBB8_7: @ %for.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldrsh r3, [r1], #2
|
||||
|
@ -1359,7 +1360,8 @@ define arm_aapcs_vfpcc void @half_short_mul(half* nocapture readonly %a, i16* no
|
|||
; CHECK-NEXT: vstmia r2!, {s0}
|
||||
; CHECK-NEXT: le lr, .LBB8_7
|
||||
; CHECK-NEXT: .LBB8_8: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
|
||||
; CHECK-NEXT: add sp, #4
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
entry:
|
||||
%cmp10 = icmp eq i32 %N, 0
|
||||
br i1 %cmp10, label %for.cond.cleanup, label %for.body.preheader
|
||||
|
|
|
@ -116,51 +116,43 @@ define <8 x i16> @v8i16(i32 %index, i32 %TC, <8 x i16> %V1, <8 x i16> %V2) {
|
|||
; CHECK-NEXT: vadd.i32 q3, q0, r0
|
||||
; CHECK-NEXT: vcmp.u32 hi, q5, q3
|
||||
; CHECK-NEXT: vpsel q4, q2, q1
|
||||
; CHECK-NEXT: vmov r1, s16
|
||||
; CHECK-NEXT: vmov r1, r12, d8
|
||||
; CHECK-NEXT: vmov.16 q0[0], r1
|
||||
; CHECK-NEXT: vmov r1, s17
|
||||
; CHECK-NEXT: vmov.16 q0[1], r1
|
||||
; CHECK-NEXT: vmov r1, s18
|
||||
; CHECK-NEXT: vmov.16 q0[1], r12
|
||||
; CHECK-NEXT: vmov r1, r12, d9
|
||||
; CHECK-NEXT: vmov.16 q0[2], r1
|
||||
; CHECK-NEXT: vmov r1, s19
|
||||
; CHECK-NEXT: vmov.16 q0[3], r1
|
||||
; CHECK-NEXT: adr r1, .LCPI2_1
|
||||
; CHECK-NEXT: vldrw.u32 q4, [r1]
|
||||
; CHECK-NEXT: vmov.16 q0[3], r12
|
||||
; CHECK-NEXT: vadd.i32 q4, q4, r0
|
||||
; CHECK-NEXT: vcmp.u32 hi, q5, q4
|
||||
; CHECK-NEXT: vpsel q5, q2, q1
|
||||
; CHECK-NEXT: vmov r1, s20
|
||||
; CHECK-NEXT: vmov r1, r12, d10
|
||||
; CHECK-NEXT: vmov.16 q0[4], r1
|
||||
; CHECK-NEXT: vmov r1, s21
|
||||
; CHECK-NEXT: vmov.16 q0[5], r1
|
||||
; CHECK-NEXT: vmov r1, s22
|
||||
; CHECK-NEXT: vmov.16 q0[6], r1
|
||||
; CHECK-NEXT: vmov r1, s23
|
||||
; CHECK-NEXT: vmov.16 q0[5], r12
|
||||
; CHECK-NEXT: vmov r1, r12, d11
|
||||
; CHECK-NEXT: vdup.32 q5, r0
|
||||
; CHECK-NEXT: vmov.16 q0[7], r1
|
||||
; CHECK-NEXT: vmov.16 q0[6], r1
|
||||
; CHECK-NEXT: vcmp.u32 hi, q5, q3
|
||||
; CHECK-NEXT: vmov.16 q0[7], r12
|
||||
; CHECK-NEXT: vpsel q6, q2, q1
|
||||
; CHECK-NEXT: vcmp.u32 hi, q5, q4
|
||||
; CHECK-NEXT: vmov r0, s24
|
||||
; CHECK-NEXT: vmov r0, r1, d12
|
||||
; CHECK-NEXT: vpsel q1, q2, q1
|
||||
; CHECK-NEXT: vmov.16 q3[0], r0
|
||||
; CHECK-NEXT: vmov r0, s25
|
||||
; CHECK-NEXT: vmov.16 q3[1], r0
|
||||
; CHECK-NEXT: vmov r0, s26
|
||||
; CHECK-NEXT: vmov.16 q3[1], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d13
|
||||
; CHECK-NEXT: vmov.16 q3[2], r0
|
||||
; CHECK-NEXT: vmov r0, s27
|
||||
; CHECK-NEXT: vmov.16 q3[3], r0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov.16 q3[3], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: vmov.16 q3[4], r0
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.16 q3[5], r0
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.16 q3[5], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: vmov.16 q3[6], r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.16 q3[7], r0
|
||||
; CHECK-NEXT: add r0, sp, #56
|
||||
; CHECK-NEXT: vcmp.i16 ne, q3, zr
|
||||
; CHECK-NEXT: vmov.16 q3[7], r1
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vcmp.i16 ne, q3, zr
|
||||
; CHECK-NEXT: vpnot
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vcmpt.i16 ne, q0, zr
|
||||
|
@ -201,27 +193,23 @@ define <16 x i8> @v16i8(i32 %index, i32 %TC, <16 x i8> %V1, <16 x i8> %V2) {
|
|||
; CHECK-NEXT: vadd.i32 q1, q0, r0
|
||||
; CHECK-NEXT: vcmp.u32 hi, q7, q1
|
||||
; CHECK-NEXT: vpsel q0, q4, q5
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r1, r12, d0
|
||||
; CHECK-NEXT: vmov.16 q2[0], r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov.16 q2[1], r1
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov.16 q2[1], r12
|
||||
; CHECK-NEXT: vmov r1, r12, d1
|
||||
; CHECK-NEXT: vmov.16 q2[2], r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov.16 q2[3], r1
|
||||
; CHECK-NEXT: adr r1, .LCPI3_1
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r1]
|
||||
; CHECK-NEXT: vmov.16 q2[3], r12
|
||||
; CHECK-NEXT: vadd.i32 q3, q0, r0
|
||||
; CHECK-NEXT: vcmp.u32 hi, q7, q3
|
||||
; CHECK-NEXT: vpsel q0, q4, q5
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r1, r12, d0
|
||||
; CHECK-NEXT: vmov.16 q2[4], r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov.16 q2[5], r1
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov.16 q2[5], r12
|
||||
; CHECK-NEXT: vmov r1, r12, d1
|
||||
; CHECK-NEXT: vmov.16 q2[6], r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov.16 q2[7], r1
|
||||
; CHECK-NEXT: vmov.16 q2[7], r12
|
||||
; CHECK-NEXT: vcmp.i16 ne, q2, zr
|
||||
; CHECK-NEXT: vpsel q0, q4, q5
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[0]
|
||||
|
@ -246,28 +234,24 @@ define <16 x i8> @v16i8(i32 %index, i32 %TC, <16 x i8> %V1, <16 x i8> %V2) {
|
|||
; CHECK-NEXT: vcmp.u32 hi, q7, q0
|
||||
; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill
|
||||
; CHECK-NEXT: vpsel q6, q4, q5
|
||||
; CHECK-NEXT: vmov r1, s24
|
||||
; CHECK-NEXT: vmov r1, r12, d12
|
||||
; CHECK-NEXT: vmov.16 q0[0], r1
|
||||
; CHECK-NEXT: vmov r1, s25
|
||||
; CHECK-NEXT: vmov.16 q0[1], r1
|
||||
; CHECK-NEXT: vmov r1, s26
|
||||
; CHECK-NEXT: vmov.16 q0[1], r12
|
||||
; CHECK-NEXT: vmov r1, r12, d13
|
||||
; CHECK-NEXT: vmov.16 q0[2], r1
|
||||
; CHECK-NEXT: vmov r1, s27
|
||||
; CHECK-NEXT: vmov.16 q0[3], r1
|
||||
; CHECK-NEXT: adr r1, .LCPI3_3
|
||||
; CHECK-NEXT: vldrw.u32 q6, [r1]
|
||||
; CHECK-NEXT: vmov.16 q0[3], r12
|
||||
; CHECK-NEXT: vadd.i32 q6, q6, r0
|
||||
; CHECK-NEXT: vcmp.u32 hi, q7, q6
|
||||
; CHECK-NEXT: vpsel q7, q4, q5
|
||||
; CHECK-NEXT: vmov r1, s28
|
||||
; CHECK-NEXT: vmov r1, r12, d14
|
||||
; CHECK-NEXT: vmov.16 q0[4], r1
|
||||
; CHECK-NEXT: vmov r1, s29
|
||||
; CHECK-NEXT: vmov.16 q0[5], r1
|
||||
; CHECK-NEXT: vmov r1, s30
|
||||
; CHECK-NEXT: vmov.16 q0[5], r12
|
||||
; CHECK-NEXT: vmov r1, r12, d15
|
||||
; CHECK-NEXT: vmov.16 q0[6], r1
|
||||
; CHECK-NEXT: vmov r1, s31
|
||||
; CHECK-NEXT: vmov.16 q0[7], r1
|
||||
; CHECK-NEXT: vdup.32 q7, r0
|
||||
; CHECK-NEXT: vmov.16 q0[7], r12
|
||||
; CHECK-NEXT: vcmp.i16 ne, q0, zr
|
||||
; CHECK-NEXT: vpsel q0, q4, q5
|
||||
; CHECK-NEXT: vcmp.u32 hi, q7, q1
|
||||
|
@ -285,27 +269,23 @@ define <16 x i8> @v16i8(i32 %index, i32 %TC, <16 x i8> %V1, <16 x i8> %V2) {
|
|||
; CHECK-NEXT: vmov.u16 r1, q0[5]
|
||||
; CHECK-NEXT: vmov.8 q2[13], r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[6]
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov.8 q2[14], r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[7]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r0
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vcmp.u32 hi, q7, q3
|
||||
; CHECK-NEXT: vmov.16 q0[1], r0
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.16 q0[2], r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vpsel q1, q4, q5
|
||||
; CHECK-NEXT: vmov.16 q0[3], r0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov.8 q2[15], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: vmov.16 q0[0], r0
|
||||
; CHECK-NEXT: vcmp.u32 hi, q7, q3
|
||||
; CHECK-NEXT: vmov.16 q0[1], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: vmov.16 q0[2], r0
|
||||
; CHECK-NEXT: vpsel q1, q4, q5
|
||||
; CHECK-NEXT: vmov.16 q0[3], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: vmov.16 q0[4], r0
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.16 q0[5], r0
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.16 q0[5], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: vmov.16 q0[6], r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.16 q0[7], r0
|
||||
; CHECK-NEXT: vmov.16 q0[7], r1
|
||||
; CHECK-NEXT: vcmp.i16 ne, q0, zr
|
||||
; CHECK-NEXT: vpsel q0, q4, q5
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[0]
|
||||
|
@ -328,23 +308,19 @@ define <16 x i8> @v16i8(i32 %index, i32 %TC, <16 x i8> %V1, <16 x i8> %V2) {
|
|||
; CHECK-NEXT: vcmp.u32 hi, q7, q0
|
||||
; CHECK-NEXT: vpsel q1, q4, q5
|
||||
; CHECK-NEXT: vcmp.u32 hi, q7, q6
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: vmov.16 q0[0], r0
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.16 q0[1], r0
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.16 q0[1], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: vmov.16 q0[2], r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vpsel q1, q4, q5
|
||||
; CHECK-NEXT: vmov.16 q0[3], r0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov.16 q0[3], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: vmov.16 q0[4], r0
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.16 q0[5], r0
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.16 q0[5], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: vmov.16 q0[6], r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.16 q0[7], r0
|
||||
; CHECK-NEXT: vmov.16 q0[7], r1
|
||||
; CHECK-NEXT: vcmp.i16 ne, q0, zr
|
||||
; CHECK-NEXT: vpsel q0, q4, q5
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[0]
|
||||
|
@ -423,50 +399,45 @@ define void @test_width2(i32* nocapture readnone %x, i32* nocapture %y, i8 zeroe
|
|||
; CHECK-NEXT: vmov.i64 q0, #0xffffffff
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r2]
|
||||
; CHECK-NEXT: add.w lr, r3, r0, lsr #1
|
||||
; CHECK-NEXT: mov.w r8, #0
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: vand q1, q1, q0
|
||||
; CHECK-NEXT: .LBB4_2: @ %vector.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r8, r8
|
||||
; CHECK-NEXT: vmov r7, s6
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r12, r12
|
||||
; CHECK-NEXT: vmov r6, r7, d3
|
||||
; CHECK-NEXT: vand q3, q3, q0
|
||||
; CHECK-NEXT: vmov r6, s7
|
||||
; CHECK-NEXT: vmov r3, s14
|
||||
; CHECK-NEXT: add.w r8, r8, #2
|
||||
; CHECK-NEXT: add.w r12, r12, #2
|
||||
; CHECK-NEXT: vmov r2, r3, d7
|
||||
; CHECK-NEXT: vmov r9, s12
|
||||
; CHECK-NEXT: vmov r2, s15
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: adds r3, #1
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r9, r3
|
||||
; CHECK-NEXT: adds r0, r2, #1
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r9, r0
|
||||
; CHECK-NEXT: adc r8, r3, #0
|
||||
; CHECK-NEXT: vand q3, q3, q0
|
||||
; CHECK-NEXT: adc r12, r2, #0
|
||||
; CHECK-NEXT: vmov r5, s14
|
||||
; CHECK-NEXT: vmov r4, s15
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: subs r7, r5, r7
|
||||
; CHECK-NEXT: vmov r7, s12
|
||||
; CHECK-NEXT: sbcs r4, r6
|
||||
; CHECK-NEXT: vmov r6, s13
|
||||
; CHECK-NEXT: mov.w r4, #0
|
||||
; CHECK-NEXT: vmov r3, r2, d2
|
||||
; CHECK-NEXT: vmov r4, r5, d7
|
||||
; CHECK-NEXT: subs r6, r4, r6
|
||||
; CHECK-NEXT: eor.w r0, r0, r4
|
||||
; CHECK-NEXT: sbcs r5, r7
|
||||
; CHECK-NEXT: vmov r6, r7, d6
|
||||
; CHECK-NEXT: mov.w r5, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r4, #1
|
||||
; CHECK-NEXT: cmp r4, #0
|
||||
; CHECK-NEXT: csetm r4, ne
|
||||
; CHECK-NEXT: subs r2, r7, r2
|
||||
; CHECK-NEXT: sbcs.w r0, r6, r0
|
||||
; CHECK-NEXT: mov.w r0, #0
|
||||
; CHECK-NEXT: movlo r5, #1
|
||||
; CHECK-NEXT: cmp r5, #0
|
||||
; CHECK-NEXT: csetm r5, ne
|
||||
; CHECK-NEXT: subs r3, r6, r3
|
||||
; CHECK-NEXT: sbcs.w r2, r7, r2
|
||||
; CHECK-NEXT: mov.w r2, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r0, r4
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r0, r4
|
||||
; CHECK-NEXT: eor.w r0, r5, r3
|
||||
; CHECK-NEXT: orrs.w r0, r0, r12
|
||||
; CHECK-NEXT: movlo r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: csetm r2, ne
|
||||
; CHECK-NEXT: orrs.w r0, r0, r8
|
||||
; CHECK-NEXT: cset r0, ne
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r2, r5
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r2, r5
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: teq.w r7, r9
|
||||
; CHECK-NEXT: teq.w r6, r9
|
||||
; CHECK-NEXT: cset r2, ne
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: csetm r2, ne
|
||||
|
|
|
@ -40,19 +40,17 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @abs_v2i64(<2 x i64> %s1) {
|
||||
; CHECK-LABEL: abs_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: adds.w r1, r1, r0, asr #31
|
||||
; CHECK-NEXT: adc.w r12, r0, r0, asr #31
|
||||
; CHECK-NEXT: eor.w r1, r1, r0, asr #31
|
||||
; CHECK-NEXT: adds.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: eor.w r0, r12, r0, asr #31
|
||||
; CHECK-NEXT: eor.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r1
|
||||
; CHECK-NEXT: adc.w r1, r3, r3, asr #31
|
||||
; CHECK-NEXT: eor.w r1, r1, r3, asr #31
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: vmov r3, r2, d0
|
||||
; CHECK-NEXT: adds.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: adc.w r12, r1, r1, asr #31
|
||||
; CHECK-NEXT: adds.w r3, r3, r2, asr #31
|
||||
; CHECK-NEXT: eor.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: eor.w r3, r3, r2, asr #31
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r0
|
||||
; CHECK-NEXT: eor.w r0, r12, r1, asr #31
|
||||
; CHECK-NEXT: adc.w r1, r2, r2, asr #31
|
||||
; CHECK-NEXT: eor.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
|
|
|
@ -4,26 +4,24 @@
|
|||
define arm_aapcs_vfpcc <2 x i64> @ctlz_2i64_0_t(<2 x i64> %src){
|
||||
; CHECK-LABEL: ctlz_2i64_0_t:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: clz r2, r2
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: add.w r2, r2, #32
|
||||
; CHECK-NEXT: cset r1, ne
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: clz r0, r0
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: cset r2, ne
|
||||
; CHECK-NEXT: adds r0, #32
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: clzne r2, r0
|
||||
; CHECK-NEXT: vmov s6, r2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: clz r2, r2
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: add.w r2, r2, #32
|
||||
; CHECK-NEXT: cset r1, ne
|
||||
; CHECK-NEXT: clzne r0, r1
|
||||
; CHECK-NEXT: vmov s6, r0
|
||||
; CHECK-NEXT: vmov r0, r1, d0
|
||||
; CHECK-NEXT: clz r0, r0
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: cset r2, ne
|
||||
; CHECK-NEXT: adds r0, #32
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: clzne r2, r0
|
||||
; CHECK-NEXT: vmov s4, r2
|
||||
; CHECK-NEXT: clzne r0, r1
|
||||
; CHECK-NEXT: vmov s4, r0
|
||||
; CHECK-NEXT: vldr s5, .LCPI0_0
|
||||
; CHECK-NEXT: vmov.f32 s7, s5
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
|
@ -70,26 +68,24 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @ctlz_2i64_1_t(<2 x i64> %src){
|
||||
; CHECK-LABEL: ctlz_2i64_1_t:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: clz r2, r2
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: add.w r2, r2, #32
|
||||
; CHECK-NEXT: cset r1, ne
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: clz r0, r0
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: cset r2, ne
|
||||
; CHECK-NEXT: adds r0, #32
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: clzne r2, r0
|
||||
; CHECK-NEXT: vmov s6, r2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: clz r2, r2
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: add.w r2, r2, #32
|
||||
; CHECK-NEXT: cset r1, ne
|
||||
; CHECK-NEXT: clzne r0, r1
|
||||
; CHECK-NEXT: vmov s6, r0
|
||||
; CHECK-NEXT: vmov r0, r1, d0
|
||||
; CHECK-NEXT: clz r0, r0
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: cset r2, ne
|
||||
; CHECK-NEXT: adds r0, #32
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: clzne r2, r0
|
||||
; CHECK-NEXT: vmov s4, r2
|
||||
; CHECK-NEXT: clzne r0, r1
|
||||
; CHECK-NEXT: vmov s4, r0
|
||||
; CHECK-NEXT: vldr s5, .LCPI4_0
|
||||
; CHECK-NEXT: vmov.f32 s7, s5
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
|
|
|
@ -5,56 +5,54 @@
|
|||
define arm_aapcs_vfpcc <2 x i64> @ctpop_2i64_t(<2 x i64> %src){
|
||||
; CHECK-LABEL: ctpop_2i64_t:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: mov.w r1, #1431655765
|
||||
; CHECK-NEXT: mov.w lr, #858993459
|
||||
; CHECK-NEXT: mov.w r4, #16843009
|
||||
; CHECK-NEXT: and.w r2, r1, r0, lsr #1
|
||||
; CHECK-NEXT: subs r0, r0, r2
|
||||
; CHECK-NEXT: and.w r3, lr, r0, lsr #2
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r1, r2, d1
|
||||
; CHECK-NEXT: mov.w lr, #1431655765
|
||||
; CHECK-NEXT: vmov r3, r4, d0
|
||||
; CHECK-NEXT: mov.w r12, #858993459
|
||||
; CHECK-NEXT: vldr s1, .LCPI0_0
|
||||
; CHECK-NEXT: and.w r0, lr, r2, lsr #1
|
||||
; CHECK-NEXT: subs r0, r2, r0
|
||||
; CHECK-NEXT: and.w r2, r12, r0, lsr #2
|
||||
; CHECK-NEXT: bic r0, r0, #-858993460
|
||||
; CHECK-NEXT: add r0, r3
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: add r0, r2
|
||||
; CHECK-NEXT: and.w r2, lr, r1, lsr #1
|
||||
; CHECK-NEXT: subs r1, r1, r2
|
||||
; CHECK-NEXT: add.w r0, r0, r0, lsr #4
|
||||
; CHECK-NEXT: bic r12, r0, #-252645136
|
||||
; CHECK-NEXT: and.w r0, r1, r3, lsr #1
|
||||
; CHECK-NEXT: subs r0, r3, r0
|
||||
; CHECK-NEXT: and.w r3, lr, r0, lsr #2
|
||||
; CHECK-NEXT: bic r0, r0, #-858993460
|
||||
; CHECK-NEXT: add r0, r3
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: add.w r0, r0, r0, lsr #4
|
||||
; CHECK-NEXT: bic r0, r0, #-252645136
|
||||
; CHECK-NEXT: muls r0, r4, r0
|
||||
; CHECK-NEXT: lsrs r0, r0, #24
|
||||
; CHECK-NEXT: and.w r2, r1, r3, lsr #1
|
||||
; CHECK-NEXT: and.w r2, r12, r1, lsr #2
|
||||
; CHECK-NEXT: bic r1, r1, #-858993460
|
||||
; CHECK-NEXT: add r1, r2
|
||||
; CHECK-NEXT: and.w r2, lr, r3, lsr #1
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: and.w r3, lr, r2, lsr #2
|
||||
; CHECK-NEXT: bic r5, r0, #-252645136
|
||||
; CHECK-NEXT: add.w r1, r1, r1, lsr #4
|
||||
; CHECK-NEXT: mov.w r0, #16843009
|
||||
; CHECK-NEXT: and.w r3, r12, r2, lsr #2
|
||||
; CHECK-NEXT: bic r2, r2, #-858993460
|
||||
; CHECK-NEXT: add r2, r3
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vldr s1, .LCPI0_0
|
||||
; CHECK-NEXT: add.w r2, r2, r2, lsr #4
|
||||
; CHECK-NEXT: bic r2, r2, #-252645136
|
||||
; CHECK-NEXT: muls r2, r4, r2
|
||||
; CHECK-NEXT: lsrs r2, r2, #24
|
||||
; CHECK-NEXT: and.w r1, r1, r3, lsr #1
|
||||
; CHECK-NEXT: subs r1, r3, r1
|
||||
; CHECK-NEXT: and.w r3, lr, r1, lsr #2
|
||||
; CHECK-NEXT: bic r1, r1, #-858993460
|
||||
; CHECK-NEXT: add r1, r3
|
||||
; CHECK-NEXT: mul r3, r12, r4
|
||||
; CHECK-NEXT: add.w r1, r1, r1, lsr #4
|
||||
; CHECK-NEXT: and.w r3, lr, r4, lsr #1
|
||||
; CHECK-NEXT: subs r3, r4, r3
|
||||
; CHECK-NEXT: bic r1, r1, #-252645136
|
||||
; CHECK-NEXT: muls r1, r4, r1
|
||||
; CHECK-NEXT: add.w r0, r0, r3, lsr #24
|
||||
; CHECK-NEXT: vmov s2, r0
|
||||
; CHECK-NEXT: add.w r0, r2, r1, lsr #24
|
||||
; CHECK-NEXT: add.w r2, r2, r2, lsr #4
|
||||
; CHECK-NEXT: muls r5, r0, r5
|
||||
; CHECK-NEXT: and.w r4, r12, r3, lsr #2
|
||||
; CHECK-NEXT: bic r3, r3, #-858993460
|
||||
; CHECK-NEXT: bic r2, r2, #-252645136
|
||||
; CHECK-NEXT: add r3, r4
|
||||
; CHECK-NEXT: muls r1, r0, r1
|
||||
; CHECK-NEXT: add.w r3, r3, r3, lsr #4
|
||||
; CHECK-NEXT: muls r2, r0, r2
|
||||
; CHECK-NEXT: bic r3, r3, #-252645136
|
||||
; CHECK-NEXT: muls r0, r3, r0
|
||||
; CHECK-NEXT: lsrs r1, r1, #24
|
||||
; CHECK-NEXT: add.w r1, r1, r5, lsr #24
|
||||
; CHECK-NEXT: lsrs r2, r2, #24
|
||||
; CHECK-NEXT: vmov s2, r1
|
||||
; CHECK-NEXT: add.w r0, r2, r0, lsr #24
|
||||
; CHECK-NEXT: vmov s0, r0
|
||||
; CHECK-NEXT: vmov.f32 s3, s1
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: .p2align 2
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI0_0:
|
||||
|
|
|
@ -5,30 +5,28 @@ define arm_aapcs_vfpcc <2 x i64> @cttz_2i64_0_t(<2 x i64> %src){
|
|||
; CHECK-LABEL: cttz_2i64_0_t:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov q1, q0
|
||||
; CHECK-NEXT: vmov r2, s7
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: rbit r2, r2
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: rbit r1, r1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: clz r2, r2
|
||||
; CHECK-NEXT: cset r1, ne
|
||||
; CHECK-NEXT: adds r2, #32
|
||||
; CHECK-NEXT: clz r1, r1
|
||||
; CHECK-NEXT: cset r2, ne
|
||||
; CHECK-NEXT: adds r1, #32
|
||||
; CHECK-NEXT: rbit r0, r0
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: clzne r2, r0
|
||||
; CHECK-NEXT: vmov s2, r2
|
||||
; CHECK-NEXT: vmov r2, s5
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: rbit r2, r2
|
||||
; CHECK-NEXT: clzne r1, r0
|
||||
; CHECK-NEXT: vmov s2, r1
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: rbit r1, r1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: clz r2, r2
|
||||
; CHECK-NEXT: cset r1, ne
|
||||
; CHECK-NEXT: adds r2, #32
|
||||
; CHECK-NEXT: clz r1, r1
|
||||
; CHECK-NEXT: cset r2, ne
|
||||
; CHECK-NEXT: adds r1, #32
|
||||
; CHECK-NEXT: rbit r0, r0
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: clzne r2, r0
|
||||
; CHECK-NEXT: vmov s0, r2
|
||||
; CHECK-NEXT: clzne r1, r0
|
||||
; CHECK-NEXT: vmov s0, r1
|
||||
; CHECK-NEXT: vldr s1, .LCPI0_0
|
||||
; CHECK-NEXT: vmov.f32 s3, s1
|
||||
; CHECK-NEXT: bx lr
|
||||
|
@ -81,30 +79,28 @@ define arm_aapcs_vfpcc <2 x i64> @cttz_2i64_1_t(<2 x i64> %src){
|
|||
; CHECK-LABEL: cttz_2i64_1_t:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov q1, q0
|
||||
; CHECK-NEXT: vmov r2, s7
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: rbit r2, r2
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: rbit r1, r1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: clz r2, r2
|
||||
; CHECK-NEXT: cset r1, ne
|
||||
; CHECK-NEXT: adds r2, #32
|
||||
; CHECK-NEXT: clz r1, r1
|
||||
; CHECK-NEXT: cset r2, ne
|
||||
; CHECK-NEXT: adds r1, #32
|
||||
; CHECK-NEXT: rbit r0, r0
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: clzne r2, r0
|
||||
; CHECK-NEXT: vmov s2, r2
|
||||
; CHECK-NEXT: vmov r2, s5
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: rbit r2, r2
|
||||
; CHECK-NEXT: clzne r1, r0
|
||||
; CHECK-NEXT: vmov s2, r1
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: rbit r1, r1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: clz r2, r2
|
||||
; CHECK-NEXT: cset r1, ne
|
||||
; CHECK-NEXT: adds r2, #32
|
||||
; CHECK-NEXT: clz r1, r1
|
||||
; CHECK-NEXT: cset r2, ne
|
||||
; CHECK-NEXT: adds r1, #32
|
||||
; CHECK-NEXT: rbit r0, r0
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: clzne r2, r0
|
||||
; CHECK-NEXT: vmov s0, r2
|
||||
; CHECK-NEXT: clzne r1, r0
|
||||
; CHECK-NEXT: vmov s0, r1
|
||||
; CHECK-NEXT: vldr s1, .LCPI4_0
|
||||
; CHECK-NEXT: vmov.f32 s3, s1
|
||||
; CHECK-NEXT: bx lr
|
||||
|
|
|
@ -5,22 +5,19 @@
|
|||
define arm_aapcs_vfpcc <4 x i32> @udiv_i32(<4 x i32> %in1, <4 x i32> %in2) {
|
||||
; CHECK-LABEL: udiv_i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: udiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: udiv r1, r2, r1
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: udiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: udiv r1, r2, r1
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov q0, q2
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r0, r12, d3
|
||||
; CHECK-NEXT: vmov r2, lr, d1
|
||||
; CHECK-NEXT: vmov r1, r3, d2
|
||||
; CHECK-NEXT: udiv r0, r2, r0
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: udiv r1, r4, r1
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: udiv r0, lr, r12
|
||||
; CHECK-NEXT: udiv r1, r5, r3
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%out = udiv <4 x i32> %in1, %in2
|
||||
ret <4 x i32> %out
|
||||
|
@ -29,22 +26,19 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @sdiv_i32(<4 x i32> %in1, <4 x i32> %in2) {
|
||||
; CHECK-LABEL: sdiv_i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: sdiv r1, r2, r1
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: sdiv r1, r2, r1
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov q0, q2
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r0, r12, d3
|
||||
; CHECK-NEXT: vmov r2, lr, d1
|
||||
; CHECK-NEXT: vmov r1, r3, d2
|
||||
; CHECK-NEXT: sdiv r0, r2, r0
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: sdiv r1, r4, r1
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: sdiv r0, lr, r12
|
||||
; CHECK-NEXT: sdiv r1, r5, r3
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%out = sdiv <4 x i32> %in1, %in2
|
||||
ret <4 x i32> %out
|
||||
|
@ -53,27 +47,23 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @urem_i32(<4 x i32> %in1, <4 x i32> %in2) {
|
||||
; CHECK-LABEL: urem_i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: udiv r2, r1, r0
|
||||
; CHECK-NEXT: mls r12, r2, r0, r1
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: udiv r3, r2, r1
|
||||
; CHECK-NEXT: mls lr, r3, r1, r2
|
||||
; CHECK-NEXT: vmov r2, s7
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: udiv r0, r3, r2
|
||||
; CHECK-NEXT: mls r0, r0, r2, r3
|
||||
; CHECK-NEXT: vmov r2, s5
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], lr, r12
|
||||
; CHECK-NEXT: udiv r1, r3, r2
|
||||
; CHECK-NEXT: mls r1, r1, r2, r3
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r0, r12, d3
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: vmov r1, lr, d2
|
||||
; CHECK-NEXT: udiv r4, r2, r0
|
||||
; CHECK-NEXT: mls r0, r4, r0, r2
|
||||
; CHECK-NEXT: vmov r2, r4, d0
|
||||
; CHECK-NEXT: udiv r5, r2, r1
|
||||
; CHECK-NEXT: mls r1, r5, r1, r2
|
||||
; CHECK-NEXT: udiv r2, r3, r12
|
||||
; CHECK-NEXT: mls r2, r2, r12, r3
|
||||
; CHECK-NEXT: udiv r3, r4, lr
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: mls r3, r3, lr, r4
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%out = urem <4 x i32> %in1, %in2
|
||||
ret <4 x i32> %out
|
||||
|
@ -82,27 +72,23 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @srem_i32(<4 x i32> %in1, <4 x i32> %in2) {
|
||||
; CHECK-LABEL: srem_i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: sdiv r2, r1, r0
|
||||
; CHECK-NEXT: mls r12, r2, r0, r1
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: sdiv r3, r2, r1
|
||||
; CHECK-NEXT: mls lr, r3, r1, r2
|
||||
; CHECK-NEXT: vmov r2, s7
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: sdiv r0, r3, r2
|
||||
; CHECK-NEXT: mls r0, r0, r2, r3
|
||||
; CHECK-NEXT: vmov r2, s5
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], lr, r12
|
||||
; CHECK-NEXT: sdiv r1, r3, r2
|
||||
; CHECK-NEXT: mls r1, r1, r2, r3
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r0, r12, d3
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: vmov r1, lr, d2
|
||||
; CHECK-NEXT: sdiv r4, r2, r0
|
||||
; CHECK-NEXT: mls r0, r4, r0, r2
|
||||
; CHECK-NEXT: vmov r2, r4, d0
|
||||
; CHECK-NEXT: sdiv r5, r2, r1
|
||||
; CHECK-NEXT: mls r1, r5, r1, r2
|
||||
; CHECK-NEXT: sdiv r2, r3, r12
|
||||
; CHECK-NEXT: mls r2, r2, r12, r3
|
||||
; CHECK-NEXT: sdiv r3, r4, lr
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: mls r3, r3, lr, r4
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%out = srem <4 x i32> %in1, %in2
|
||||
ret <4 x i32> %out
|
||||
|
@ -637,17 +623,13 @@ define arm_aapcs_vfpcc <2 x i64> @udiv_i64(<2 x i64> %in1, <2 x i64> %in2) {
|
|||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vmov q4, q1
|
||||
; CHECK-NEXT: vmov q5, q0
|
||||
; CHECK-NEXT: vmov r0, s22
|
||||
; CHECK-NEXT: vmov r1, s23
|
||||
; CHECK-NEXT: vmov r2, s18
|
||||
; CHECK-NEXT: vmov r3, s19
|
||||
; CHECK-NEXT: vmov r0, r1, d11
|
||||
; CHECK-NEXT: vmov r2, r3, d9
|
||||
; CHECK-NEXT: bl __aeabi_uldivmod
|
||||
; CHECK-NEXT: mov r4, r0
|
||||
; CHECK-NEXT: mov r5, r1
|
||||
; CHECK-NEXT: vmov r0, s20
|
||||
; CHECK-NEXT: vmov r1, s21
|
||||
; CHECK-NEXT: vmov r2, s16
|
||||
; CHECK-NEXT: vmov r3, s17
|
||||
; CHECK-NEXT: vmov r0, r1, d10
|
||||
; CHECK-NEXT: vmov r2, r3, d8
|
||||
; CHECK-NEXT: bl __aeabi_uldivmod
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r4
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r5
|
||||
|
@ -667,17 +649,13 @@ define arm_aapcs_vfpcc <2 x i64> @sdiv_i64(<2 x i64> %in1, <2 x i64> %in2) {
|
|||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vmov q4, q1
|
||||
; CHECK-NEXT: vmov q5, q0
|
||||
; CHECK-NEXT: vmov r0, s22
|
||||
; CHECK-NEXT: vmov r1, s23
|
||||
; CHECK-NEXT: vmov r2, s18
|
||||
; CHECK-NEXT: vmov r3, s19
|
||||
; CHECK-NEXT: vmov r0, r1, d11
|
||||
; CHECK-NEXT: vmov r2, r3, d9
|
||||
; CHECK-NEXT: bl __aeabi_ldivmod
|
||||
; CHECK-NEXT: mov r4, r0
|
||||
; CHECK-NEXT: mov r5, r1
|
||||
; CHECK-NEXT: vmov r0, s20
|
||||
; CHECK-NEXT: vmov r1, s21
|
||||
; CHECK-NEXT: vmov r2, s16
|
||||
; CHECK-NEXT: vmov r3, s17
|
||||
; CHECK-NEXT: vmov r0, r1, d10
|
||||
; CHECK-NEXT: vmov r2, r3, d8
|
||||
; CHECK-NEXT: bl __aeabi_ldivmod
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r4
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r5
|
||||
|
@ -697,17 +675,13 @@ define arm_aapcs_vfpcc <2 x i64> @urem_i64(<2 x i64> %in1, <2 x i64> %in2) {
|
|||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vmov q4, q1
|
||||
; CHECK-NEXT: vmov q5, q0
|
||||
; CHECK-NEXT: vmov r0, s22
|
||||
; CHECK-NEXT: vmov r1, s23
|
||||
; CHECK-NEXT: vmov r2, s18
|
||||
; CHECK-NEXT: vmov r3, s19
|
||||
; CHECK-NEXT: vmov r0, r1, d11
|
||||
; CHECK-NEXT: vmov r2, r3, d9
|
||||
; CHECK-NEXT: bl __aeabi_uldivmod
|
||||
; CHECK-NEXT: mov r4, r2
|
||||
; CHECK-NEXT: mov r5, r3
|
||||
; CHECK-NEXT: vmov r0, s20
|
||||
; CHECK-NEXT: vmov r1, s21
|
||||
; CHECK-NEXT: vmov r2, s16
|
||||
; CHECK-NEXT: vmov r3, s17
|
||||
; CHECK-NEXT: vmov r0, r1, d10
|
||||
; CHECK-NEXT: vmov r2, r3, d8
|
||||
; CHECK-NEXT: bl __aeabi_uldivmod
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r4
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r5
|
||||
|
@ -727,17 +701,13 @@ define arm_aapcs_vfpcc <2 x i64> @srem_i64(<2 x i64> %in1, <2 x i64> %in2) {
|
|||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vmov q4, q1
|
||||
; CHECK-NEXT: vmov q5, q0
|
||||
; CHECK-NEXT: vmov r0, s22
|
||||
; CHECK-NEXT: vmov r1, s23
|
||||
; CHECK-NEXT: vmov r2, s18
|
||||
; CHECK-NEXT: vmov r3, s19
|
||||
; CHECK-NEXT: vmov r0, r1, d11
|
||||
; CHECK-NEXT: vmov r2, r3, d9
|
||||
; CHECK-NEXT: bl __aeabi_ldivmod
|
||||
; CHECK-NEXT: mov r4, r2
|
||||
; CHECK-NEXT: mov r5, r3
|
||||
; CHECK-NEXT: vmov r0, s20
|
||||
; CHECK-NEXT: vmov r1, s21
|
||||
; CHECK-NEXT: vmov r2, s16
|
||||
; CHECK-NEXT: vmov r3, s17
|
||||
; CHECK-NEXT: vmov r0, r1, d10
|
||||
; CHECK-NEXT: vmov r2, r3, d8
|
||||
; CHECK-NEXT: bl __aeabi_ldivmod
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r4
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r5
|
||||
|
@ -774,24 +744,22 @@ define arm_aapcs_vfpcc <4 x float> @frem_f32(<4 x float> %in1, <4 x float> %in2)
|
|||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vmov q4, q1
|
||||
; CHECK-NEXT: vmov q5, q0
|
||||
; CHECK-NEXT: vmov r0, s22
|
||||
; CHECK-NEXT: vmov r1, s18
|
||||
; CHECK-NEXT: vmov r0, r4, d11
|
||||
; CHECK-NEXT: vmov r1, r5, d9
|
||||
; CHECK-NEXT: bl fmodf
|
||||
; CHECK-NEXT: mov r4, r0
|
||||
; CHECK-NEXT: vmov r0, s23
|
||||
; CHECK-NEXT: vmov r1, s19
|
||||
; CHECK-NEXT: mov r6, r0
|
||||
; CHECK-NEXT: mov r0, r4
|
||||
; CHECK-NEXT: mov r1, r5
|
||||
; CHECK-NEXT: bl fmodf
|
||||
; CHECK-NEXT: vmov r2, s21
|
||||
; CHECK-NEXT: vmov r1, s17
|
||||
; CHECK-NEXT: vmov r6, s16
|
||||
; CHECK-NEXT: vmov r4, r2, d10
|
||||
; CHECK-NEXT: vmov r5, r1, d8
|
||||
; CHECK-NEXT: vmov s19, r0
|
||||
; CHECK-NEXT: vmov r5, s20
|
||||
; CHECK-NEXT: vmov s18, r4
|
||||
; CHECK-NEXT: vmov s18, r6
|
||||
; CHECK-NEXT: mov r0, r2
|
||||
; CHECK-NEXT: bl fmodf
|
||||
; CHECK-NEXT: vmov s17, r0
|
||||
; CHECK-NEXT: mov r0, r5
|
||||
; CHECK-NEXT: mov r1, r6
|
||||
; CHECK-NEXT: mov r0, r4
|
||||
; CHECK-NEXT: mov r1, r5
|
||||
; CHECK-NEXT: bl fmodf
|
||||
; CHECK-NEXT: vmov s16, r0
|
||||
; CHECK-NEXT: vmov q0, q4
|
||||
|
|
|
@ -74,19 +74,18 @@ define arm_aapcs_vfpcc <4 x float> @cos_float32_t(<4 x float> %src) {
|
|||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov q4, q0
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov r0, r4, d9
|
||||
; CHECK-NEXT: bl cosf
|
||||
; CHECK-NEXT: mov r4, r0
|
||||
; CHECK-NEXT: vmov r0, s19
|
||||
; CHECK-NEXT: mov r5, r0
|
||||
; CHECK-NEXT: mov r0, r4
|
||||
; CHECK-NEXT: bl cosf
|
||||
; CHECK-NEXT: vmov r1, s17
|
||||
; CHECK-NEXT: vmov r5, s16
|
||||
; CHECK-NEXT: vmov r4, r1, d8
|
||||
; CHECK-NEXT: vmov s19, r0
|
||||
; CHECK-NEXT: vmov s18, r4
|
||||
; CHECK-NEXT: vmov s18, r5
|
||||
; CHECK-NEXT: mov r0, r1
|
||||
; CHECK-NEXT: bl cosf
|
||||
; CHECK-NEXT: vmov s17, r0
|
||||
; CHECK-NEXT: mov r0, r5
|
||||
; CHECK-NEXT: mov r0, r4
|
||||
; CHECK-NEXT: bl cosf
|
||||
; CHECK-NEXT: vmov s16, r0
|
||||
; CHECK-NEXT: vmov q0, q4
|
||||
|
@ -186,19 +185,18 @@ define arm_aapcs_vfpcc <4 x float> @sin_float32_t(<4 x float> %src) {
|
|||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov q4, q0
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov r0, r4, d9
|
||||
; CHECK-NEXT: bl sinf
|
||||
; CHECK-NEXT: mov r4, r0
|
||||
; CHECK-NEXT: vmov r0, s19
|
||||
; CHECK-NEXT: mov r5, r0
|
||||
; CHECK-NEXT: mov r0, r4
|
||||
; CHECK-NEXT: bl sinf
|
||||
; CHECK-NEXT: vmov r1, s17
|
||||
; CHECK-NEXT: vmov r5, s16
|
||||
; CHECK-NEXT: vmov r4, r1, d8
|
||||
; CHECK-NEXT: vmov s19, r0
|
||||
; CHECK-NEXT: vmov s18, r4
|
||||
; CHECK-NEXT: vmov s18, r5
|
||||
; CHECK-NEXT: mov r0, r1
|
||||
; CHECK-NEXT: bl sinf
|
||||
; CHECK-NEXT: vmov s17, r0
|
||||
; CHECK-NEXT: mov r0, r5
|
||||
; CHECK-NEXT: mov r0, r4
|
||||
; CHECK-NEXT: bl sinf
|
||||
; CHECK-NEXT: vmov s16, r0
|
||||
; CHECK-NEXT: vmov q0, q4
|
||||
|
@ -298,19 +296,18 @@ define arm_aapcs_vfpcc <4 x float> @exp_float32_t(<4 x float> %src) {
|
|||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov q4, q0
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov r0, r4, d9
|
||||
; CHECK-NEXT: bl expf
|
||||
; CHECK-NEXT: mov r4, r0
|
||||
; CHECK-NEXT: vmov r0, s19
|
||||
; CHECK-NEXT: mov r5, r0
|
||||
; CHECK-NEXT: mov r0, r4
|
||||
; CHECK-NEXT: bl expf
|
||||
; CHECK-NEXT: vmov r1, s17
|
||||
; CHECK-NEXT: vmov r5, s16
|
||||
; CHECK-NEXT: vmov r4, r1, d8
|
||||
; CHECK-NEXT: vmov s19, r0
|
||||
; CHECK-NEXT: vmov s18, r4
|
||||
; CHECK-NEXT: vmov s18, r5
|
||||
; CHECK-NEXT: mov r0, r1
|
||||
; CHECK-NEXT: bl expf
|
||||
; CHECK-NEXT: vmov s17, r0
|
||||
; CHECK-NEXT: mov r0, r5
|
||||
; CHECK-NEXT: mov r0, r4
|
||||
; CHECK-NEXT: bl expf
|
||||
; CHECK-NEXT: vmov s16, r0
|
||||
; CHECK-NEXT: vmov q0, q4
|
||||
|
@ -410,19 +407,18 @@ define arm_aapcs_vfpcc <4 x float> @exp2_float32_t(<4 x float> %src) {
|
|||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov q4, q0
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov r0, r4, d9
|
||||
; CHECK-NEXT: bl exp2f
|
||||
; CHECK-NEXT: mov r4, r0
|
||||
; CHECK-NEXT: vmov r0, s19
|
||||
; CHECK-NEXT: mov r5, r0
|
||||
; CHECK-NEXT: mov r0, r4
|
||||
; CHECK-NEXT: bl exp2f
|
||||
; CHECK-NEXT: vmov r1, s17
|
||||
; CHECK-NEXT: vmov r5, s16
|
||||
; CHECK-NEXT: vmov r4, r1, d8
|
||||
; CHECK-NEXT: vmov s19, r0
|
||||
; CHECK-NEXT: vmov s18, r4
|
||||
; CHECK-NEXT: vmov s18, r5
|
||||
; CHECK-NEXT: mov r0, r1
|
||||
; CHECK-NEXT: bl exp2f
|
||||
; CHECK-NEXT: vmov s17, r0
|
||||
; CHECK-NEXT: mov r0, r5
|
||||
; CHECK-NEXT: mov r0, r4
|
||||
; CHECK-NEXT: bl exp2f
|
||||
; CHECK-NEXT: vmov s16, r0
|
||||
; CHECK-NEXT: vmov q0, q4
|
||||
|
@ -522,19 +518,18 @@ define arm_aapcs_vfpcc <4 x float> @log_float32_t(<4 x float> %src) {
|
|||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov q4, q0
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov r0, r4, d9
|
||||
; CHECK-NEXT: bl logf
|
||||
; CHECK-NEXT: mov r4, r0
|
||||
; CHECK-NEXT: vmov r0, s19
|
||||
; CHECK-NEXT: mov r5, r0
|
||||
; CHECK-NEXT: mov r0, r4
|
||||
; CHECK-NEXT: bl logf
|
||||
; CHECK-NEXT: vmov r1, s17
|
||||
; CHECK-NEXT: vmov r5, s16
|
||||
; CHECK-NEXT: vmov r4, r1, d8
|
||||
; CHECK-NEXT: vmov s19, r0
|
||||
; CHECK-NEXT: vmov s18, r4
|
||||
; CHECK-NEXT: vmov s18, r5
|
||||
; CHECK-NEXT: mov r0, r1
|
||||
; CHECK-NEXT: bl logf
|
||||
; CHECK-NEXT: vmov s17, r0
|
||||
; CHECK-NEXT: mov r0, r5
|
||||
; CHECK-NEXT: mov r0, r4
|
||||
; CHECK-NEXT: bl logf
|
||||
; CHECK-NEXT: vmov s16, r0
|
||||
; CHECK-NEXT: vmov q0, q4
|
||||
|
@ -634,19 +629,18 @@ define arm_aapcs_vfpcc <4 x float> @log2_float32_t(<4 x float> %src) {
|
|||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov q4, q0
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov r0, r4, d9
|
||||
; CHECK-NEXT: bl log2f
|
||||
; CHECK-NEXT: mov r4, r0
|
||||
; CHECK-NEXT: vmov r0, s19
|
||||
; CHECK-NEXT: mov r5, r0
|
||||
; CHECK-NEXT: mov r0, r4
|
||||
; CHECK-NEXT: bl log2f
|
||||
; CHECK-NEXT: vmov r1, s17
|
||||
; CHECK-NEXT: vmov r5, s16
|
||||
; CHECK-NEXT: vmov r4, r1, d8
|
||||
; CHECK-NEXT: vmov s19, r0
|
||||
; CHECK-NEXT: vmov s18, r4
|
||||
; CHECK-NEXT: vmov s18, r5
|
||||
; CHECK-NEXT: mov r0, r1
|
||||
; CHECK-NEXT: bl log2f
|
||||
; CHECK-NEXT: vmov s17, r0
|
||||
; CHECK-NEXT: mov r0, r5
|
||||
; CHECK-NEXT: mov r0, r4
|
||||
; CHECK-NEXT: bl log2f
|
||||
; CHECK-NEXT: vmov s16, r0
|
||||
; CHECK-NEXT: vmov q0, q4
|
||||
|
@ -746,19 +740,18 @@ define arm_aapcs_vfpcc <4 x float> @log10_float32_t(<4 x float> %src) {
|
|||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov q4, q0
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov r0, r4, d9
|
||||
; CHECK-NEXT: bl log10f
|
||||
; CHECK-NEXT: mov r4, r0
|
||||
; CHECK-NEXT: vmov r0, s19
|
||||
; CHECK-NEXT: mov r5, r0
|
||||
; CHECK-NEXT: mov r0, r4
|
||||
; CHECK-NEXT: bl log10f
|
||||
; CHECK-NEXT: vmov r1, s17
|
||||
; CHECK-NEXT: vmov r5, s16
|
||||
; CHECK-NEXT: vmov r4, r1, d8
|
||||
; CHECK-NEXT: vmov s19, r0
|
||||
; CHECK-NEXT: vmov s18, r4
|
||||
; CHECK-NEXT: vmov s18, r5
|
||||
; CHECK-NEXT: mov r0, r1
|
||||
; CHECK-NEXT: bl log10f
|
||||
; CHECK-NEXT: vmov s17, r0
|
||||
; CHECK-NEXT: mov r0, r5
|
||||
; CHECK-NEXT: mov r0, r4
|
||||
; CHECK-NEXT: bl log10f
|
||||
; CHECK-NEXT: vmov s16, r0
|
||||
; CHECK-NEXT: vmov q0, q4
|
||||
|
@ -859,24 +852,22 @@ define arm_aapcs_vfpcc <4 x float> @pow_float32_t(<4 x float> %src1, <4 x float>
|
|||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vmov q4, q1
|
||||
; CHECK-NEXT: vmov q5, q0
|
||||
; CHECK-NEXT: vmov r0, s22
|
||||
; CHECK-NEXT: vmov r1, s18
|
||||
; CHECK-NEXT: vmov r0, r4, d11
|
||||
; CHECK-NEXT: vmov r1, r5, d9
|
||||
; CHECK-NEXT: bl powf
|
||||
; CHECK-NEXT: mov r4, r0
|
||||
; CHECK-NEXT: vmov r0, s23
|
||||
; CHECK-NEXT: vmov r1, s19
|
||||
; CHECK-NEXT: mov r6, r0
|
||||
; CHECK-NEXT: mov r0, r4
|
||||
; CHECK-NEXT: mov r1, r5
|
||||
; CHECK-NEXT: bl powf
|
||||
; CHECK-NEXT: vmov r2, s21
|
||||
; CHECK-NEXT: vmov r1, s17
|
||||
; CHECK-NEXT: vmov r6, s16
|
||||
; CHECK-NEXT: vmov r4, r2, d10
|
||||
; CHECK-NEXT: vmov r5, r1, d8
|
||||
; CHECK-NEXT: vmov s19, r0
|
||||
; CHECK-NEXT: vmov r5, s20
|
||||
; CHECK-NEXT: vmov s18, r4
|
||||
; CHECK-NEXT: vmov s18, r6
|
||||
; CHECK-NEXT: mov r0, r2
|
||||
; CHECK-NEXT: bl powf
|
||||
; CHECK-NEXT: vmov s17, r0
|
||||
; CHECK-NEXT: mov r0, r5
|
||||
; CHECK-NEXT: mov r1, r6
|
||||
; CHECK-NEXT: mov r0, r4
|
||||
; CHECK-NEXT: mov r1, r5
|
||||
; CHECK-NEXT: bl powf
|
||||
; CHECK-NEXT: vmov s16, r0
|
||||
; CHECK-NEXT: vmov q0, q4
|
||||
|
@ -993,26 +984,22 @@ define arm_aapcs_vfpcc <4 x float> @copysign_float32_t(<4 x float> %src1, <4 x f
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov lr, s6
|
||||
; CHECK-NEXT: vmov r12, s7
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r4, s4
|
||||
; CHECK-NEXT: vmov r5, s0
|
||||
; CHECK-NEXT: lsrs r0, r0, #31
|
||||
; CHECK-NEXT: bfi r3, r0, #31, #1
|
||||
; CHECK-NEXT: lsr.w r0, lr, #31
|
||||
; CHECK-NEXT: bfi r2, r0, #31, #1
|
||||
; CHECK-NEXT: lsr.w r0, r12, #31
|
||||
; CHECK-NEXT: bfi r1, r0, #31, #1
|
||||
; CHECK-NEXT: vmov s3, r1
|
||||
; CHECK-NEXT: lsrs r0, r4, #31
|
||||
; CHECK-NEXT: vmov s2, r2
|
||||
; CHECK-NEXT: bfi r5, r0, #31, #1
|
||||
; CHECK-NEXT: vmov s1, r3
|
||||
; CHECK-NEXT: vmov s0, r5
|
||||
; CHECK-NEXT: vmov r12, r1, d2
|
||||
; CHECK-NEXT: vmov r2, lr, d3
|
||||
; CHECK-NEXT: vmov r3, r0, d0
|
||||
; CHECK-NEXT: vmov r4, r5, d1
|
||||
; CHECK-NEXT: lsrs r1, r1, #31
|
||||
; CHECK-NEXT: bfi r0, r1, #31, #1
|
||||
; CHECK-NEXT: lsrs r1, r2, #31
|
||||
; CHECK-NEXT: bfi r4, r1, #31, #1
|
||||
; CHECK-NEXT: lsr.w r1, lr, #31
|
||||
; CHECK-NEXT: bfi r5, r1, #31, #1
|
||||
; CHECK-NEXT: lsr.w r1, r12, #31
|
||||
; CHECK-NEXT: bfi r3, r1, #31, #1
|
||||
; CHECK-NEXT: vmov s3, r5
|
||||
; CHECK-NEXT: vmov s2, r4
|
||||
; CHECK-NEXT: vmov s1, r0
|
||||
; CHECK-NEXT: vmov s0, r3
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%0 = call fast <4 x float> @llvm.copysign.v4f32(<4 x float> %src1, <4 x float> %src2)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -49,36 +49,32 @@ define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_sext(i16* %base, <8 x i16>* %offp
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vldrh.s32 q0, [r1]
|
||||
; CHECK-NEXT: vldrh.s32 q1, [r1, #8]
|
||||
; CHECK-NEXT: vldrh.s32 q0, [r1, #8]
|
||||
; CHECK-NEXT: vshl.i32 q0, q0, #1
|
||||
; CHECK-NEXT: vshl.i32 q1, q1, #1
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vmov r4, s7
|
||||
; CHECK-NEXT: ldrh.w r12, [r2]
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: ldrh.w lr, [r3]
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: vmov r2, r12, d0
|
||||
; CHECK-NEXT: vmov r3, lr, d1
|
||||
; CHECK-NEXT: vldrh.s32 q0, [r1]
|
||||
; CHECK-NEXT: vshl.i32 q0, q0, #1
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: ldrh r2, [r2]
|
||||
; CHECK-NEXT: ldrh.w r12, [r12]
|
||||
; CHECK-NEXT: ldrh r3, [r3]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r2
|
||||
; CHECK-NEXT: ldrh.w lr, [lr]
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r4
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: vmov.16 q0[1], r5
|
||||
; CHECK-NEXT: vmov.16 q0[2], r12
|
||||
; CHECK-NEXT: vmov.16 q0[3], lr
|
||||
; CHECK-NEXT: vmov.16 q0[4], r0
|
||||
; CHECK-NEXT: vmov.16 q0[5], r1
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: vmov.16 q0[2], r0
|
||||
; CHECK-NEXT: vmov.16 q0[3], r1
|
||||
; CHECK-NEXT: vmov.16 q0[4], r2
|
||||
; CHECK-NEXT: vmov.16 q0[5], r12
|
||||
; CHECK-NEXT: vmov.16 q0[6], r3
|
||||
; CHECK-NEXT: vmov.16 q0[7], r4
|
||||
; CHECK-NEXT: vmov.16 q0[7], lr
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%offs = load <8 x i16>, <8 x i16>* %offptr, align 2
|
||||
|
@ -94,29 +90,25 @@ define arm_aapcs_vfpcc <8 x half> @scaled_v8f16_sext(i16* %base, <8 x i16>* %off
|
|||
; CHECK-NEXT: vldrh.s32 q0, [r1]
|
||||
; CHECK-NEXT: vshl.i32 q0, q0, #1
|
||||
; CHECK-NEXT: vadd.i32 q1, q0, r0
|
||||
; CHECK-NEXT: vmov r2, s5
|
||||
; CHECK-NEXT: vldr.16 s8, [r2]
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: vldr.16 s8, [r3]
|
||||
; CHECK-NEXT: vldr.16 s0, [r2]
|
||||
; CHECK-NEXT: vmov r3, s7
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: vldr.16 s4, [r3]
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: vins.f16 s0, s8
|
||||
; CHECK-NEXT: vldr.16 s4, [r3]
|
||||
; CHECK-NEXT: vldr.16 s1, [r2]
|
||||
; CHECK-NEXT: vins.f16 s1, s4
|
||||
; CHECK-NEXT: vldrh.s32 q1, [r1, #8]
|
||||
; CHECK-NEXT: vshl.i32 q1, q1, #1
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vldr.16 s2, [r0]
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: vldr.16 s8, [r1]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vldr.16 s2, [r0]
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: vins.f16 s2, s8
|
||||
; CHECK-NEXT: vldr.16 s8, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vldr.16 s4, [r1]
|
||||
; CHECK-NEXT: vldr.16 s3, [r0]
|
||||
; CHECK-NEXT: vins.f16 s3, s8
|
||||
; CHECK-NEXT: vins.f16 s3, s4
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%offs = load <8 x i16>, <8 x i16>* %offptr, align 2
|
||||
|
@ -259,39 +251,35 @@ define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_2gep(i16* %base, <8 x i16>* %
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vldrh.s32 q0, [r1]
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x28
|
||||
; CHECK-NEXT: vldrh.s32 q2, [r1, #8]
|
||||
; CHECK-NEXT: vshl.i32 q0, q0, #1
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vshl.i32 q2, q2, #1
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, r0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vadd.i32 q1, q2, q1
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vmov r4, s7
|
||||
; CHECK-NEXT: ldrh.w r12, [r2]
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: ldrh.w lr, [r3]
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: vldrh.s32 q1, [r1, #8]
|
||||
; CHECK-NEXT: vmov.i32 q0, #0x28
|
||||
; CHECK-NEXT: vshl.i32 q1, q1, #1
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, q0
|
||||
; CHECK-NEXT: vmov r2, r12, d2
|
||||
; CHECK-NEXT: vmov r3, lr, d3
|
||||
; CHECK-NEXT: vldrh.s32 q1, [r1]
|
||||
; CHECK-NEXT: vshl.i32 q1, q1, #1
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vadd.i32 q0, q1, q0
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: ldrh r2, [r2]
|
||||
; CHECK-NEXT: ldrh.w r12, [r12]
|
||||
; CHECK-NEXT: ldrh r3, [r3]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r2
|
||||
; CHECK-NEXT: ldrh.w lr, [lr]
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r4
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: vmov.16 q0[1], r5
|
||||
; CHECK-NEXT: vmov.16 q0[2], r12
|
||||
; CHECK-NEXT: vmov.16 q0[3], lr
|
||||
; CHECK-NEXT: vmov.16 q0[4], r0
|
||||
; CHECK-NEXT: vmov.16 q0[5], r1
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: vmov.16 q0[2], r0
|
||||
; CHECK-NEXT: vmov.16 q0[3], r1
|
||||
; CHECK-NEXT: vmov.16 q0[4], r2
|
||||
; CHECK-NEXT: vmov.16 q0[5], r12
|
||||
; CHECK-NEXT: vmov.16 q0[6], r3
|
||||
; CHECK-NEXT: vmov.16 q0[7], r4
|
||||
; CHECK-NEXT: vmov.16 q0[7], lr
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%offs = load <8 x i16>, <8 x i16>* %offptr, align 2
|
||||
|
@ -378,51 +366,47 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep3(i16* %base) {
|
||||
; CHECK-LABEL: scaled_v8i16_i16_biggep3:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: adr r1, .LCPI17_0
|
||||
; CHECK-NEXT: adr.w r12, .LCPI17_1
|
||||
; CHECK-NEXT: adr r2, .LCPI17_1
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r1]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r12]
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r4, s7
|
||||
; CHECK-NEXT: ldrh.w lr, [r1]
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: ldrh r6, [r2]
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: vmov r1, lr, d0
|
||||
; CHECK-NEXT: vmov r3, r12, d1
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r2]
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: vmov r0, r2, d1
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: ldrh.w lr, [lr]
|
||||
; CHECK-NEXT: ldrh r3, [r3]
|
||||
; CHECK-NEXT: ldrh.w r12, [r12]
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r3
|
||||
; CHECK-NEXT: vmov.16 q0[0], r4
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: vmov.16 q0[1], r5
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: vmov.16 q0[2], lr
|
||||
; CHECK-NEXT: vmov.16 q0[3], r6
|
||||
; CHECK-NEXT: vmov.16 q0[4], r0
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: ldrh r2, [r2]
|
||||
; CHECK-NEXT: vmov.16 q0[5], r1
|
||||
; CHECK-NEXT: vmov.16 q0[6], r2
|
||||
; CHECK-NEXT: vmov.16 q0[7], r4
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
; CHECK-NEXT: vmov.16 q0[2], r0
|
||||
; CHECK-NEXT: vmov.16 q0[3], r2
|
||||
; CHECK-NEXT: vmov.16 q0[4], r1
|
||||
; CHECK-NEXT: vmov.16 q0[5], lr
|
||||
; CHECK-NEXT: vmov.16 q0[6], r3
|
||||
; CHECK-NEXT: vmov.16 q0[7], r12
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI17_0:
|
||||
; CHECK-NEXT: .long 131072 @ 0x20000
|
||||
; CHECK-NEXT: .long 131078 @ 0x20006
|
||||
; CHECK-NEXT: .long 131084 @ 0x2000c
|
||||
; CHECK-NEXT: .long 131090 @ 0x20012
|
||||
; CHECK-NEXT: .LCPI17_1:
|
||||
; CHECK-NEXT: .long 131096 @ 0x20018
|
||||
; CHECK-NEXT: .long 131102 @ 0x2001e
|
||||
; CHECK-NEXT: .long 131108 @ 0x20024
|
||||
; CHECK-NEXT: .long 131114 @ 0x2002a
|
||||
; CHECK-NEXT: .LCPI17_1:
|
||||
; CHECK-NEXT: .long 131072 @ 0x20000
|
||||
; CHECK-NEXT: .long 131078 @ 0x20006
|
||||
; CHECK-NEXT: .long 131084 @ 0x2000c
|
||||
; CHECK-NEXT: .long 131090 @ 0x20012
|
||||
entry:
|
||||
%ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
|
||||
%ptrs2 = getelementptr inbounds i16,<8 x i16*> %ptrs, i32 65536
|
||||
|
@ -433,51 +417,47 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep4(i16* %base) {
|
||||
; CHECK-LABEL: scaled_v8i16_i16_biggep4:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: adr r1, .LCPI18_0
|
||||
; CHECK-NEXT: adr.w r12, .LCPI18_1
|
||||
; CHECK-NEXT: adr r2, .LCPI18_1
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r1]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r12]
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r4, s7
|
||||
; CHECK-NEXT: ldrh.w lr, [r1]
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: ldrh r6, [r2]
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: vmov r1, lr, d0
|
||||
; CHECK-NEXT: vmov r3, r12, d1
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r2]
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: vmov r0, r2, d1
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: ldrh.w lr, [lr]
|
||||
; CHECK-NEXT: ldrh r3, [r3]
|
||||
; CHECK-NEXT: ldrh.w r12, [r12]
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r3
|
||||
; CHECK-NEXT: vmov.16 q0[0], r4
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: vmov.16 q0[1], r5
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: vmov.16 q0[2], lr
|
||||
; CHECK-NEXT: vmov.16 q0[3], r6
|
||||
; CHECK-NEXT: vmov.16 q0[4], r0
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: ldrh r2, [r2]
|
||||
; CHECK-NEXT: vmov.16 q0[5], r1
|
||||
; CHECK-NEXT: vmov.16 q0[6], r2
|
||||
; CHECK-NEXT: vmov.16 q0[7], r4
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
; CHECK-NEXT: vmov.16 q0[2], r0
|
||||
; CHECK-NEXT: vmov.16 q0[3], r2
|
||||
; CHECK-NEXT: vmov.16 q0[4], r1
|
||||
; CHECK-NEXT: vmov.16 q0[5], lr
|
||||
; CHECK-NEXT: vmov.16 q0[6], r3
|
||||
; CHECK-NEXT: vmov.16 q0[7], r12
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI18_0:
|
||||
; CHECK-NEXT: .long 0 @ 0x0
|
||||
; CHECK-NEXT: .long 6 @ 0x6
|
||||
; CHECK-NEXT: .long 12 @ 0xc
|
||||
; CHECK-NEXT: .long 18 @ 0x12
|
||||
; CHECK-NEXT: .LCPI18_1:
|
||||
; CHECK-NEXT: .long 24 @ 0x18
|
||||
; CHECK-NEXT: .long 131072 @ 0x20000
|
||||
; CHECK-NEXT: .long 36 @ 0x24
|
||||
; CHECK-NEXT: .long 42 @ 0x2a
|
||||
; CHECK-NEXT: .LCPI18_1:
|
||||
; CHECK-NEXT: .long 0 @ 0x0
|
||||
; CHECK-NEXT: .long 6 @ 0x6
|
||||
; CHECK-NEXT: .long 12 @ 0xc
|
||||
; CHECK-NEXT: .long 18 @ 0x12
|
||||
entry:
|
||||
%ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 65536, i32 18, i32 21>
|
||||
%gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
|
||||
|
@ -487,36 +467,32 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep5(<8 x i16*> %base) {
|
||||
; CHECK-LABEL: scaled_v8i16_i16_biggep5:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x20000
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, q2
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, q2
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: vmov r4, s7
|
||||
; CHECK-NEXT: ldrh.w r12, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: ldrh.w lr, [r1]
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: ldrh r2, [r2]
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r2
|
||||
; CHECK-NEXT: ldrh r3, [r3]
|
||||
; CHECK-NEXT: vmov.16 q0[1], r5
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: vmov r1, lr, d1
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: vmov r0, r12, d2
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: vmov.16 q0[2], r12
|
||||
; CHECK-NEXT: vmov.16 q0[3], lr
|
||||
; CHECK-NEXT: vmov.16 q0[4], r3
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r4
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: vmov.16 q0[5], r0
|
||||
; CHECK-NEXT: vmov.16 q0[6], r1
|
||||
; CHECK-NEXT: vmov.16 q0[7], r4
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: vmov.16 q0[1], r5
|
||||
; CHECK-NEXT: ldrh r6, [r3]
|
||||
; CHECK-NEXT: ldrh.w r3, [lr]
|
||||
; CHECK-NEXT: vmov.16 q0[2], r1
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: vmov.16 q0[3], r3
|
||||
; CHECK-NEXT: ldrh.w r12, [r12]
|
||||
; CHECK-NEXT: vmov.16 q0[4], r0
|
||||
; CHECK-NEXT: ldrh r2, [r2]
|
||||
; CHECK-NEXT: vmov.16 q0[5], r12
|
||||
; CHECK-NEXT: vmov.16 q0[6], r2
|
||||
; CHECK-NEXT: vmov.16 q0[7], r6
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%ptrs2 = getelementptr inbounds i16,<8 x i16*> %base, i32 65536
|
||||
%gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
|
||||
|
@ -526,51 +502,47 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep6(i16* %base) {
|
||||
; CHECK-LABEL: scaled_v8i16_i16_biggep6:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: adr r1, .LCPI20_0
|
||||
; CHECK-NEXT: adr.w r12, .LCPI20_1
|
||||
; CHECK-NEXT: adr r2, .LCPI20_1
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r1]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r12]
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r4, s7
|
||||
; CHECK-NEXT: ldrh.w lr, [r1]
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: ldrh r6, [r2]
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: vmov r1, lr, d0
|
||||
; CHECK-NEXT: vmov r3, r12, d1
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r2]
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: vmov r0, r2, d1
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: ldrh.w lr, [lr]
|
||||
; CHECK-NEXT: ldrh r3, [r3]
|
||||
; CHECK-NEXT: ldrh.w r12, [r12]
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r3
|
||||
; CHECK-NEXT: vmov.16 q0[0], r4
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: vmov.16 q0[1], r5
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: vmov.16 q0[2], lr
|
||||
; CHECK-NEXT: vmov.16 q0[3], r6
|
||||
; CHECK-NEXT: vmov.16 q0[4], r0
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: ldrh r2, [r2]
|
||||
; CHECK-NEXT: vmov.16 q0[5], r1
|
||||
; CHECK-NEXT: vmov.16 q0[6], r2
|
||||
; CHECK-NEXT: vmov.16 q0[7], r4
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
; CHECK-NEXT: vmov.16 q0[2], r0
|
||||
; CHECK-NEXT: vmov.16 q0[3], r2
|
||||
; CHECK-NEXT: vmov.16 q0[4], r1
|
||||
; CHECK-NEXT: vmov.16 q0[5], lr
|
||||
; CHECK-NEXT: vmov.16 q0[6], r3
|
||||
; CHECK-NEXT: vmov.16 q0[7], r12
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI20_0:
|
||||
; CHECK-NEXT: .long 2 @ 0x2
|
||||
; CHECK-NEXT: .long 8 @ 0x8
|
||||
; CHECK-NEXT: .long 14 @ 0xe
|
||||
; CHECK-NEXT: .long 20 @ 0x14
|
||||
; CHECK-NEXT: .LCPI20_1:
|
||||
; CHECK-NEXT: .long 131074 @ 0x20002
|
||||
; CHECK-NEXT: .long 32 @ 0x20
|
||||
; CHECK-NEXT: .long 38 @ 0x26
|
||||
; CHECK-NEXT: .long 44 @ 0x2c
|
||||
; CHECK-NEXT: .LCPI20_1:
|
||||
; CHECK-NEXT: .long 2 @ 0x2
|
||||
; CHECK-NEXT: .long 8 @ 0x8
|
||||
; CHECK-NEXT: .long 14 @ 0xe
|
||||
; CHECK-NEXT: .long 20 @ 0x14
|
||||
entry:
|
||||
%ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 65536, i32 15, i32 18, i32 21>
|
||||
%ptrs2 = getelementptr inbounds i16,<8 x i16*> %ptrs, i32 1
|
||||
|
@ -581,51 +553,47 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep7(i16* %base, <8 x i16>* %offptr) {
|
||||
; CHECK-LABEL: scaled_v8i16_i16_biggep7:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: adr r1, .LCPI21_0
|
||||
; CHECK-NEXT: adr.w r12, .LCPI21_1
|
||||
; CHECK-NEXT: adr r2, .LCPI21_1
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r1]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r12]
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r4, s7
|
||||
; CHECK-NEXT: ldrh.w lr, [r1]
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: ldrh r6, [r2]
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: vmov r1, lr, d0
|
||||
; CHECK-NEXT: vmov r3, r12, d1
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r2]
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: vmov r0, r2, d1
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: ldrh.w lr, [lr]
|
||||
; CHECK-NEXT: ldrh r3, [r3]
|
||||
; CHECK-NEXT: ldrh.w r12, [r12]
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r3
|
||||
; CHECK-NEXT: vmov.16 q0[0], r4
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: vmov.16 q0[1], r5
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: vmov.16 q0[2], lr
|
||||
; CHECK-NEXT: vmov.16 q0[3], r6
|
||||
; CHECK-NEXT: vmov.16 q0[4], r0
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: ldrh r2, [r2]
|
||||
; CHECK-NEXT: vmov.16 q0[5], r1
|
||||
; CHECK-NEXT: vmov.16 q0[6], r2
|
||||
; CHECK-NEXT: vmov.16 q0[7], r4
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
; CHECK-NEXT: vmov.16 q0[2], r0
|
||||
; CHECK-NEXT: vmov.16 q0[3], r2
|
||||
; CHECK-NEXT: vmov.16 q0[4], r1
|
||||
; CHECK-NEXT: vmov.16 q0[5], lr
|
||||
; CHECK-NEXT: vmov.16 q0[6], r3
|
||||
; CHECK-NEXT: vmov.16 q0[7], r12
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI21_0:
|
||||
; CHECK-NEXT: .long 128 @ 0x80
|
||||
; CHECK-NEXT: .long 1206 @ 0x4b6
|
||||
; CHECK-NEXT: .long 1212 @ 0x4bc
|
||||
; CHECK-NEXT: .long 1218 @ 0x4c2
|
||||
; CHECK-NEXT: .LCPI21_1:
|
||||
; CHECK-NEXT: .long 1224 @ 0x4c8
|
||||
; CHECK-NEXT: .long 1230 @ 0x4ce
|
||||
; CHECK-NEXT: .long 1236 @ 0x4d4
|
||||
; CHECK-NEXT: .long 1242 @ 0x4da
|
||||
; CHECK-NEXT: .LCPI21_1:
|
||||
; CHECK-NEXT: .long 128 @ 0x80
|
||||
; CHECK-NEXT: .long 1206 @ 0x4b6
|
||||
; CHECK-NEXT: .long 1212 @ 0x4bc
|
||||
; CHECK-NEXT: .long 1218 @ 0x4c2
|
||||
entry:
|
||||
%ptrs = getelementptr inbounds i16, i16* %base, <8 x i16> <i16 65000, i16 3, i16 6, i16 9, i16 12, i16 15, i16 18, i16 21>
|
||||
%ptrs2 = getelementptr inbounds i16,<8 x i16*> %ptrs, i16 600
|
||||
|
@ -638,36 +606,32 @@ define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_basei32(i32* %base, <8 x i16>
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vldrh.u32 q0, [r1]
|
||||
; CHECK-NEXT: vldrh.u32 q1, [r1, #8]
|
||||
; CHECK-NEXT: vldrh.u32 q0, [r1, #8]
|
||||
; CHECK-NEXT: vshl.i32 q0, q0, #2
|
||||
; CHECK-NEXT: vshl.i32 q1, q1, #2
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vmov r4, s7
|
||||
; CHECK-NEXT: ldrh.w r12, [r2]
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: ldrh.w lr, [r3]
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: vmov r2, r12, d0
|
||||
; CHECK-NEXT: vmov r3, lr, d1
|
||||
; CHECK-NEXT: vldrh.u32 q0, [r1]
|
||||
; CHECK-NEXT: vshl.i32 q0, q0, #2
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: ldrh r2, [r2]
|
||||
; CHECK-NEXT: ldrh.w r12, [r12]
|
||||
; CHECK-NEXT: ldrh r3, [r3]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r2
|
||||
; CHECK-NEXT: ldrh.w lr, [lr]
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r4
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: vmov.16 q0[1], r5
|
||||
; CHECK-NEXT: vmov.16 q0[2], r12
|
||||
; CHECK-NEXT: vmov.16 q0[3], lr
|
||||
; CHECK-NEXT: vmov.16 q0[4], r0
|
||||
; CHECK-NEXT: vmov.16 q0[5], r1
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: vmov.16 q0[2], r0
|
||||
; CHECK-NEXT: vmov.16 q0[3], r1
|
||||
; CHECK-NEXT: vmov.16 q0[4], r2
|
||||
; CHECK-NEXT: vmov.16 q0[5], r12
|
||||
; CHECK-NEXT: vmov.16 q0[6], r3
|
||||
; CHECK-NEXT: vmov.16 q0[7], r4
|
||||
; CHECK-NEXT: vmov.16 q0[7], lr
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%offs = load <8 x i16>, <8 x i16>* %offptr, align 2
|
||||
|
|
|
@ -19,38 +19,34 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @zext_unscaled_i8_i16_noext(i8* %base, <8 x i8>* %offptr) {
|
||||
; CHECK-LABEL: zext_unscaled_i8_i16_noext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vldrb.s32 q0, [r1]
|
||||
; CHECK-NEXT: vldrb.s32 q1, [r1, #4]
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vldrb.s32 q0, [r1, #4]
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r5, s0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vmov r4, s7
|
||||
; CHECK-NEXT: ldrb.w r12, [r2]
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: ldrb.w lr, [r3]
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: ldrb r5, [r5]
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r5
|
||||
; CHECK-NEXT: ldrb r1, [r1]
|
||||
; CHECK-NEXT: vmov.16 q0[1], lr
|
||||
; CHECK-NEXT: ldrb r4, [r4]
|
||||
; CHECK-NEXT: ldrb r2, [r2]
|
||||
; CHECK-NEXT: vmov r2, lr, d1
|
||||
; CHECK-NEXT: vmov r12, r3, d0
|
||||
; CHECK-NEXT: vldrb.s32 q0, [r1]
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: ldrb r6, [r2]
|
||||
; CHECK-NEXT: ldrb.w r2, [r12]
|
||||
; CHECK-NEXT: ldrb r3, [r3]
|
||||
; CHECK-NEXT: vmov.16 q0[2], r2
|
||||
; CHECK-NEXT: vmov.16 q0[3], r12
|
||||
; CHECK-NEXT: vmov.16 q0[4], r0
|
||||
; CHECK-NEXT: vmov.16 q0[5], r1
|
||||
; CHECK-NEXT: vmov.16 q0[6], r3
|
||||
; CHECK-NEXT: vmov.16 q0[7], r4
|
||||
; CHECK-NEXT: ldrb.w lr, [lr]
|
||||
; CHECK-NEXT: ldrb r4, [r4]
|
||||
; CHECK-NEXT: ldrb r5, [r5]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r4
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: vmov.16 q0[1], r5
|
||||
; CHECK-NEXT: ldrb r1, [r1]
|
||||
; CHECK-NEXT: vmov.16 q0[2], r0
|
||||
; CHECK-NEXT: vmov.16 q0[3], r1
|
||||
; CHECK-NEXT: vmov.16 q0[4], r2
|
||||
; CHECK-NEXT: vmov.16 q0[5], r3
|
||||
; CHECK-NEXT: vmov.16 q0[6], r6
|
||||
; CHECK-NEXT: vmov.16 q0[7], lr
|
||||
; CHECK-NEXT: vmovlb.u8 q0, q0
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%offs = load <8 x i8>, <8 x i8>* %offptr, align 2
|
||||
%ptrs = getelementptr inbounds i8, i8* %base, <8 x i8> %offs
|
||||
|
@ -64,36 +60,32 @@ define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_sext(i16* %base, <8 x i8>* %offpt
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vldrb.s32 q0, [r1]
|
||||
; CHECK-NEXT: vldrb.s32 q1, [r1, #4]
|
||||
; CHECK-NEXT: vldrb.s32 q0, [r1, #4]
|
||||
; CHECK-NEXT: vshl.i32 q0, q0, #1
|
||||
; CHECK-NEXT: vshl.i32 q1, q1, #1
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vmov r4, s7
|
||||
; CHECK-NEXT: ldrh.w r12, [r2]
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: ldrh.w lr, [r3]
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: vmov r2, r12, d0
|
||||
; CHECK-NEXT: vmov r3, lr, d1
|
||||
; CHECK-NEXT: vldrb.s32 q0, [r1]
|
||||
; CHECK-NEXT: vshl.i32 q0, q0, #1
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: ldrh r2, [r2]
|
||||
; CHECK-NEXT: ldrh.w r12, [r12]
|
||||
; CHECK-NEXT: ldrh r3, [r3]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r2
|
||||
; CHECK-NEXT: ldrh.w lr, [lr]
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r4
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: vmov.16 q0[1], r5
|
||||
; CHECK-NEXT: vmov.16 q0[2], r12
|
||||
; CHECK-NEXT: vmov.16 q0[3], lr
|
||||
; CHECK-NEXT: vmov.16 q0[4], r0
|
||||
; CHECK-NEXT: vmov.16 q0[5], r1
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: vmov.16 q0[2], r0
|
||||
; CHECK-NEXT: vmov.16 q0[3], r1
|
||||
; CHECK-NEXT: vmov.16 q0[4], r2
|
||||
; CHECK-NEXT: vmov.16 q0[5], r12
|
||||
; CHECK-NEXT: vmov.16 q0[6], r3
|
||||
; CHECK-NEXT: vmov.16 q0[7], r4
|
||||
; CHECK-NEXT: vmov.16 q0[7], lr
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%offs = load <8 x i8>, <8 x i8>* %offptr, align 2
|
||||
|
@ -108,36 +100,32 @@ define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_zext(i16* %base, <8 x i8>* %offpt
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vldrb.u32 q0, [r1]
|
||||
; CHECK-NEXT: vldrb.u32 q1, [r1, #4]
|
||||
; CHECK-NEXT: vldrb.u32 q0, [r1, #4]
|
||||
; CHECK-NEXT: vshl.i32 q0, q0, #1
|
||||
; CHECK-NEXT: vshl.i32 q1, q1, #1
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vmov r4, s7
|
||||
; CHECK-NEXT: ldrh.w r12, [r2]
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: ldrh.w lr, [r3]
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: vmov r2, r12, d0
|
||||
; CHECK-NEXT: vmov r3, lr, d1
|
||||
; CHECK-NEXT: vldrb.u32 q0, [r1]
|
||||
; CHECK-NEXT: vshl.i32 q0, q0, #1
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: ldrh r2, [r2]
|
||||
; CHECK-NEXT: ldrh.w r12, [r12]
|
||||
; CHECK-NEXT: ldrh r3, [r3]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r2
|
||||
; CHECK-NEXT: ldrh.w lr, [lr]
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r4
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: vmov.16 q0[1], r5
|
||||
; CHECK-NEXT: vmov.16 q0[2], r12
|
||||
; CHECK-NEXT: vmov.16 q0[3], lr
|
||||
; CHECK-NEXT: vmov.16 q0[4], r0
|
||||
; CHECK-NEXT: vmov.16 q0[5], r1
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: vmov.16 q0[2], r0
|
||||
; CHECK-NEXT: vmov.16 q0[3], r1
|
||||
; CHECK-NEXT: vmov.16 q0[4], r2
|
||||
; CHECK-NEXT: vmov.16 q0[5], r12
|
||||
; CHECK-NEXT: vmov.16 q0[6], r3
|
||||
; CHECK-NEXT: vmov.16 q0[7], r4
|
||||
; CHECK-NEXT: vmov.16 q0[7], lr
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%offs = load <8 x i8>, <8 x i8>* %offptr, align 2
|
||||
|
|
|
@ -461,16 +461,14 @@ define arm_aapcs_vfpcc <4 x i32> @qi4(<4 x i32*> %p) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x10
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: ldr r0, [r0]
|
||||
; CHECK-NEXT: ldr r1, [r1]
|
||||
; CHECK-NEXT: ldr r2, [r2]
|
||||
; CHECK-NEXT: ldr r1, [r1]
|
||||
; CHECK-NEXT: ldr r3, [r3]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%g = getelementptr inbounds i32, <4 x i32*> %p, i32 4
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -32,31 +32,27 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i32> @ptr_v8i32(<8 x i32*>* %offptr) {
|
||||
; CHECK-LABEL: ptr_v8i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
|
||||
; CHECK-NEXT: vmov r1, r2, d1
|
||||
; CHECK-NEXT: vmov r3, r12, d0
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: vmov r4, s5
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: ldr.w r12, [r1]
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: ldr.w lr, [r2]
|
||||
; CHECK-NEXT: vmov r2, s7
|
||||
; CHECK-NEXT: ldr r0, [r0]
|
||||
; CHECK-NEXT: vmov r0, lr, d1
|
||||
; CHECK-NEXT: ldr r7, [r2]
|
||||
; CHECK-NEXT: vmov r2, r4, d0
|
||||
; CHECK-NEXT: ldr r6, [r1]
|
||||
; CHECK-NEXT: ldr r3, [r3]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], lr, r12
|
||||
; CHECK-NEXT: ldr r4, [r4]
|
||||
; CHECK-NEXT: ldr r5, [r5]
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r5, r3
|
||||
; CHECK-NEXT: ldr r1, [r1]
|
||||
; CHECK-NEXT: ldr r0, [r0]
|
||||
; CHECK-NEXT: ldr.w r1, [r12]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r6
|
||||
; CHECK-NEXT: ldr.w r5, [lr]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r7
|
||||
; CHECK-NEXT: ldr r2, [r2]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r4, r2
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: ldr r4, [r4]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r4, r5
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
entry:
|
||||
%offs = load <8 x i32*>, <8 x i32*>* %offptr, align 4
|
||||
%gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %offs, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
|
||||
|
@ -68,50 +64,42 @@ define arm_aapcs_vfpcc <16 x i32> @ptr_v16i32(<16 x i32*>* %offptr) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #32]
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #48]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r0, #16]
|
||||
; CHECK-NEXT: vmov r1, s10
|
||||
; CHECK-NEXT: vmov r5, s6
|
||||
; CHECK-NEXT: vmov r6, s4
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r7, s5
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: vmov r4, s8
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #32]
|
||||
; CHECK-NEXT: vmov r1, r2, d1
|
||||
; CHECK-NEXT: vmov r3, lr, d0
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vmov r4, r5, d1
|
||||
; CHECK-NEXT: ldr r7, [r2]
|
||||
; CHECK-NEXT: vmov r2, r6, d0
|
||||
; CHECK-NEXT: ldr.w r12, [r1]
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: ldr r3, [r3]
|
||||
; CHECK-NEXT: ldr r4, [r4]
|
||||
; CHECK-NEXT: ldr r5, [r5]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r3, r12
|
||||
; CHECK-NEXT: ldr.w r1, [lr]
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r7
|
||||
; CHECK-NEXT: ldr r2, [r2]
|
||||
; CHECK-NEXT: ldr r6, [r6]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r4
|
||||
; CHECK-NEXT: vmov r2, r4, d3
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r6, r5
|
||||
; CHECK-NEXT: vmov r6, r5, d2
|
||||
; CHECK-NEXT: ldr r2, [r2]
|
||||
; CHECK-NEXT: ldr r6, [r6]
|
||||
; CHECK-NEXT: ldr r5, [r5]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r6, r2
|
||||
; CHECK-NEXT: ldr r6, [r4]
|
||||
; CHECK-NEXT: vmov r0, r2, d5
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r5, r6
|
||||
; CHECK-NEXT: vmov r6, r5, d4
|
||||
; CHECK-NEXT: ldr r0, [r0]
|
||||
; CHECK-NEXT: ldr r6, [r6]
|
||||
; CHECK-NEXT: ldr r2, [r2]
|
||||
; CHECK-NEXT: ldr r7, [r7]
|
||||
; CHECK-NEXT: ldr r0, [r0]
|
||||
; CHECK-NEXT: ldr r4, [r4]
|
||||
; CHECK-NEXT: ldr.w lr, [r1]
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: ldr r3, [r1]
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r6, r5
|
||||
; CHECK-NEXT: vmov r6, s7
|
||||
; CHECK-NEXT: vmov r5, s11
|
||||
; CHECK-NEXT: ldr r1, [r1]
|
||||
; CHECK-NEXT: ldr r6, [r6]
|
||||
; CHECK-NEXT: ldr r5, [r5]
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r7, r6
|
||||
; CHECK-NEXT: vmov r6, s12
|
||||
; CHECK-NEXT: ldr r6, [r6]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r6, r0
|
||||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: vmov r6, s13
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r3, lr
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r2, r1
|
||||
; CHECK-NEXT: ldr r0, [r0]
|
||||
; CHECK-NEXT: ldr r6, [r6]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r6, r0
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r4, r12
|
||||
; CHECK-NEXT: ldr r0, [r0]
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r0, r5
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r6, r0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r5, r2
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
entry:
|
||||
%offs = load <16 x i32*>, <16 x i32*>* %offptr, align 4
|
||||
|
@ -149,25 +137,23 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x float> @ptr_v8f32(<8 x float*>* %offptr) {
|
||||
; CHECK-LABEL: ptr_v8f32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #16]
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vldr s3, [r1]
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: vldr s2, [r1]
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vmov r12, r2, d1
|
||||
; CHECK-NEXT: vmov lr, r1, d0
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
|
||||
; CHECK-NEXT: vmov r0, r3, d1
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: vldr s3, [r2]
|
||||
; CHECK-NEXT: vldr s2, [r12]
|
||||
; CHECK-NEXT: vldr s1, [r1]
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: vldr s7, [r0]
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vldr s0, [r1]
|
||||
; CHECK-NEXT: vldr s7, [r3]
|
||||
; CHECK-NEXT: vldr s6, [r0]
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vldr s5, [r0]
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: vldr s4, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vldr s5, [r5]
|
||||
; CHECK-NEXT: vldr s0, [lr]
|
||||
; CHECK-NEXT: vldr s4, [r4]
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%offs = load <8 x float*>, <8 x float*>* %offptr, align 4
|
||||
%gather = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %offs, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef)
|
||||
|
@ -179,35 +165,31 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @ptr_i16(<8 x i16*>* %offptr) {
|
||||
; CHECK-LABEL: ptr_i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: vmov r3, r12, d1
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r4, s7
|
||||
; CHECK-NEXT: ldrh.w r12, [r1]
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: ldrh.w lr, [r2]
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: ldrh r3, [r3]
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: vmov r0, lr, d1
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: ldrh r6, [r3]
|
||||
; CHECK-NEXT: ldrh r2, [r2]
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r3
|
||||
; CHECK-NEXT: vmov.16 q0[0], r4
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: vmov.16 q0[1], r5
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: vmov.16 q0[2], r12
|
||||
; CHECK-NEXT: vmov.16 q0[3], lr
|
||||
; CHECK-NEXT: vmov.16 q0[4], r0
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: ldrh r2, [r2]
|
||||
; CHECK-NEXT: vmov.16 q0[5], r1
|
||||
; CHECK-NEXT: vmov.16 q0[6], r2
|
||||
; CHECK-NEXT: vmov.16 q0[7], r4
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: ldrh.w r3, [lr]
|
||||
; CHECK-NEXT: vmov.16 q0[2], r0
|
||||
; CHECK-NEXT: ldrh.w r12, [r12]
|
||||
; CHECK-NEXT: vmov.16 q0[3], r3
|
||||
; CHECK-NEXT: vmov.16 q0[4], r1
|
||||
; CHECK-NEXT: vmov.16 q0[5], r2
|
||||
; CHECK-NEXT: vmov.16 q0[6], r6
|
||||
; CHECK-NEXT: vmov.16 q0[7], r12
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%offs = load <8 x i16*>, <8 x i16*>* %offptr, align 4
|
||||
%gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %offs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
|
||||
|
@ -253,10 +235,8 @@ define arm_aapcs_vfpcc <4 x i32> @ptr_v4i16_sext(<4 x i16*>* %offptr) {
|
|||
; CHECK-LABEL: ptr_v4i16_sext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: ldrh r2, [r2]
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
|
@ -276,10 +256,8 @@ define arm_aapcs_vfpcc <4 x i32> @ptr_v4i16_zext(<4 x i16*>* %offptr) {
|
|||
; CHECK-LABEL: ptr_v4i16_zext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: ldrh r2, [r2]
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
|
@ -298,33 +276,29 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i32> @ptr_v8i16_sext(<8 x i16*>* %offptr) {
|
||||
; CHECK-LABEL: ptr_v8i16_sext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
|
||||
; CHECK-NEXT: vmov r1, r2, d1
|
||||
; CHECK-NEXT: vmov r12, r3, d0
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: vmov r4, s5
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: ldrh.w r12, [r1]
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: ldrh.w lr, [r2]
|
||||
; CHECK-NEXT: vmov r2, s7
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: ldrh r3, [r3]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], lr, r12
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r5, r3
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: vmov r0, lr, d1
|
||||
; CHECK-NEXT: ldrh r7, [r2]
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: ldrh r2, [r2]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r4, r2
|
||||
; CHECK-NEXT: ldrh.w r2, [r12]
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r2, r1
|
||||
; CHECK-NEXT: ldrh r3, [r3]
|
||||
; CHECK-NEXT: ldrh.w r6, [lr]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r4, r0
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r7
|
||||
; CHECK-NEXT: vmovlb.s16 q1, q1
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r5, r6
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
entry:
|
||||
%offs = load <8 x i16*>, <8 x i16*>* %offptr, align 4
|
||||
%gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %offs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
|
||||
|
@ -335,33 +309,29 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i32> @ptr_v8i16_zext(<8 x i16*>* %offptr) {
|
||||
; CHECK-LABEL: ptr_v8i16_zext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
|
||||
; CHECK-NEXT: vmov r1, r2, d1
|
||||
; CHECK-NEXT: vmov r12, r3, d0
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: vmov r4, s5
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: ldrh.w r12, [r1]
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: ldrh.w lr, [r2]
|
||||
; CHECK-NEXT: vmov r2, s7
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: ldrh r3, [r3]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], lr, r12
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r5, r3
|
||||
; CHECK-NEXT: vmovlb.u16 q0, q0
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: vmov r0, lr, d1
|
||||
; CHECK-NEXT: ldrh r7, [r2]
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: ldrh r2, [r2]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r4, r2
|
||||
; CHECK-NEXT: ldrh.w r2, [r12]
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r2, r1
|
||||
; CHECK-NEXT: ldrh r3, [r3]
|
||||
; CHECK-NEXT: ldrh.w r6, [lr]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r4, r0
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r7
|
||||
; CHECK-NEXT: vmovlb.u16 q1, q1
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r5, r6
|
||||
; CHECK-NEXT: vmovlb.u16 q0, q0
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
entry:
|
||||
%offs = load <8 x i16*>, <8 x i16*>* %offptr, align 4
|
||||
%gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %offs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
|
||||
|
@ -375,27 +345,23 @@ define arm_aapcs_vfpcc <8 x half> @ptr_f16(<8 x half*>* %offptr) {
|
|||
; CHECK-LABEL: ptr_f16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vldr.16 s8, [r1]
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: vmov r1, r2, d2
|
||||
; CHECK-NEXT: vldr.16 s8, [r2]
|
||||
; CHECK-NEXT: vldr.16 s0, [r1]
|
||||
; CHECK-NEXT: vmov r2, s7
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: vldr.16 s4, [r2]
|
||||
; CHECK-NEXT: vmov r1, r2, d3
|
||||
; CHECK-NEXT: vins.f16 s0, s8
|
||||
; CHECK-NEXT: vldr.16 s4, [r2]
|
||||
; CHECK-NEXT: vldr.16 s1, [r1]
|
||||
; CHECK-NEXT: vins.f16 s1, s4
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vldr.16 s2, [r0]
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: vldr.16 s8, [r1]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vldr.16 s2, [r0]
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: vins.f16 s2, s8
|
||||
; CHECK-NEXT: vldr.16 s8, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vldr.16 s4, [r1]
|
||||
; CHECK-NEXT: vldr.16 s3, [r0]
|
||||
; CHECK-NEXT: vins.f16 s3, s8
|
||||
; CHECK-NEXT: vins.f16 s3, s4
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%offs = load <8 x half*>, <8 x half*>* %offptr, align 4
|
||||
|
@ -408,61 +374,53 @@ entry:
|
|||
define arm_aapcs_vfpcc <16 x i8> @ptr_i8(<16 x i8*>* %offptr) {
|
||||
; CHECK-LABEL: ptr_i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #32]
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #48]
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0]
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: vmov r5, s8
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r6, s11
|
||||
; CHECK-NEXT: vmov r4, s7
|
||||
; CHECK-NEXT: ldrb.w r12, [r1]
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: ldrb r5, [r5]
|
||||
; CHECK-NEXT: ldrb r2, [r2]
|
||||
; CHECK-NEXT: ldrb r6, [r6]
|
||||
; CHECK-NEXT: ldrb r4, [r4]
|
||||
; CHECK-NEXT: ldrb.w lr, [r1]
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: ldrb r3, [r1]
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov.8 q0[0], r5
|
||||
; CHECK-NEXT: vmov r5, s9
|
||||
; CHECK-NEXT: ldrb r1, [r1]
|
||||
; CHECK-NEXT: ldrb r5, [r5]
|
||||
; CHECK-NEXT: vmov.8 q0[1], r5
|
||||
; CHECK-NEXT: vmov r5, s10
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #32]
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: vmov r6, r7, d4
|
||||
; CHECK-NEXT: vmov r4, r3, d1
|
||||
; CHECK-NEXT: ldrb r5, [r1]
|
||||
; CHECK-NEXT: ldrb r1, [r2]
|
||||
; CHECK-NEXT: ldrb r2, [r6]
|
||||
; CHECK-NEXT: ldrb.w r12, [r3]
|
||||
; CHECK-NEXT: vmov.8 q0[0], r2
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: ldrb.w lr, [r4]
|
||||
; CHECK-NEXT: ldrb r4, [r2]
|
||||
; CHECK-NEXT: ldrb r2, [r3]
|
||||
; CHECK-NEXT: ldrb r3, [r7]
|
||||
; CHECK-NEXT: vmov.8 q0[1], r3
|
||||
; CHECK-NEXT: vmov r3, r6, d5
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #16]
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: ldrb r5, [r5]
|
||||
; CHECK-NEXT: vmov.8 q0[2], r5
|
||||
; CHECK-NEXT: vmov r5, s4
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: ldrb r3, [r3]
|
||||
; CHECK-NEXT: ldrb r6, [r6]
|
||||
; CHECK-NEXT: vmov.8 q0[2], r3
|
||||
; CHECK-NEXT: vmov r0, r3, d4
|
||||
; CHECK-NEXT: vmov.8 q0[3], r6
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: ldrb r3, [r3]
|
||||
; CHECK-NEXT: vmov.8 q0[4], r0
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: ldrb r5, [r5]
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: vmov.8 q0[5], r0
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov.8 q0[5], r3
|
||||
; CHECK-NEXT: vmov r0, r3, d5
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: ldrb r3, [r3]
|
||||
; CHECK-NEXT: vmov.8 q0[6], r0
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov.8 q0[7], r3
|
||||
; CHECK-NEXT: vmov r0, r3, d2
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: vmov.8 q0[7], r0
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.8 q0[8], r5
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: vmov.8 q0[9], r0
|
||||
; CHECK-NEXT: vmov.8 q0[10], r12
|
||||
; CHECK-NEXT: vmov.8 q0[11], r4
|
||||
; CHECK-NEXT: vmov.8 q0[12], lr
|
||||
; CHECK-NEXT: vmov.8 q0[13], r3
|
||||
; CHECK-NEXT: vmov.8 q0[14], r1
|
||||
; CHECK-NEXT: vmov.8 q0[15], r2
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
; CHECK-NEXT: ldrb r3, [r3]
|
||||
; CHECK-NEXT: vmov.8 q0[8], r0
|
||||
; CHECK-NEXT: vmov.8 q0[9], r3
|
||||
; CHECK-NEXT: vmov.8 q0[10], r4
|
||||
; CHECK-NEXT: vmov.8 q0[11], r2
|
||||
; CHECK-NEXT: vmov.8 q0[12], r5
|
||||
; CHECK-NEXT: vmov.8 q0[13], r1
|
||||
; CHECK-NEXT: vmov.8 q0[14], lr
|
||||
; CHECK-NEXT: vmov.8 q0[15], r12
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
entry:
|
||||
%offs = load <16 x i8*>, <16 x i8*>* %offptr, align 4
|
||||
%gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %offs, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
|
||||
|
@ -472,36 +430,32 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @ptr_v8i8_sext16(<8 x i8*>* %offptr) {
|
||||
; CHECK-LABEL: ptr_v8i8_sext16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
|
||||
; CHECK-NEXT: vmov r3, r1, d1
|
||||
; CHECK-NEXT: vmov r12, r2, d0
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r5, s0
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r4, s7
|
||||
; CHECK-NEXT: ldrb.w r12, [r2]
|
||||
; CHECK-NEXT: vmov r2, s5
|
||||
; CHECK-NEXT: ldrb.w lr, [r1]
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: ldrb r5, [r5]
|
||||
; CHECK-NEXT: ldrb r3, [r3]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r5
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: vmov.16 q0[1], lr
|
||||
; CHECK-NEXT: ldrb r4, [r4]
|
||||
; CHECK-NEXT: vmov.16 q0[2], r3
|
||||
; CHECK-NEXT: vmov.16 q0[3], r12
|
||||
; CHECK-NEXT: vmov.16 q0[4], r0
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: vmov lr, r0, d1
|
||||
; CHECK-NEXT: ldrb r7, [r1]
|
||||
; CHECK-NEXT: ldrb.w r1, [r12]
|
||||
; CHECK-NEXT: ldrb r2, [r2]
|
||||
; CHECK-NEXT: ldrb r1, [r1]
|
||||
; CHECK-NEXT: ldrb r4, [r4]
|
||||
; CHECK-NEXT: ldrb r5, [r5]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r4
|
||||
; CHECK-NEXT: ldrb.w r6, [lr]
|
||||
; CHECK-NEXT: vmov.16 q0[1], r5
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: vmov.16 q0[2], r6
|
||||
; CHECK-NEXT: ldrb r3, [r3]
|
||||
; CHECK-NEXT: vmov.16 q0[3], r0
|
||||
; CHECK-NEXT: vmov.16 q0[4], r1
|
||||
; CHECK-NEXT: vmov.16 q0[5], r2
|
||||
; CHECK-NEXT: vmov.16 q0[6], r1
|
||||
; CHECK-NEXT: vmov.16 q0[7], r4
|
||||
; CHECK-NEXT: vmov.16 q0[6], r3
|
||||
; CHECK-NEXT: vmov.16 q0[7], r7
|
||||
; CHECK-NEXT: vmovlb.s8 q0, q0
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
entry:
|
||||
%offs = load <8 x i8*>, <8 x i8*>* %offptr, align 4
|
||||
%gather = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> %offs, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef)
|
||||
|
@ -512,36 +466,32 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @ptr_v8i8_zext16(<8 x i8*>* %offptr) {
|
||||
; CHECK-LABEL: ptr_v8i8_zext16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
|
||||
; CHECK-NEXT: vmov r3, r1, d1
|
||||
; CHECK-NEXT: vmov r12, r2, d0
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r5, s0
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r4, s7
|
||||
; CHECK-NEXT: ldrb.w r12, [r2]
|
||||
; CHECK-NEXT: vmov r2, s5
|
||||
; CHECK-NEXT: ldrb.w lr, [r1]
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: ldrb r5, [r5]
|
||||
; CHECK-NEXT: ldrb r3, [r3]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r5
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: vmov.16 q0[1], lr
|
||||
; CHECK-NEXT: ldrb r4, [r4]
|
||||
; CHECK-NEXT: vmov.16 q0[2], r3
|
||||
; CHECK-NEXT: vmov.16 q0[3], r12
|
||||
; CHECK-NEXT: vmov.16 q0[4], r0
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: vmov lr, r0, d1
|
||||
; CHECK-NEXT: ldrb r7, [r1]
|
||||
; CHECK-NEXT: ldrb.w r1, [r12]
|
||||
; CHECK-NEXT: ldrb r2, [r2]
|
||||
; CHECK-NEXT: ldrb r1, [r1]
|
||||
; CHECK-NEXT: ldrb r4, [r4]
|
||||
; CHECK-NEXT: ldrb r5, [r5]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r4
|
||||
; CHECK-NEXT: ldrb.w r6, [lr]
|
||||
; CHECK-NEXT: vmov.16 q0[1], r5
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: vmov.16 q0[2], r6
|
||||
; CHECK-NEXT: ldrb r3, [r3]
|
||||
; CHECK-NEXT: vmov.16 q0[3], r0
|
||||
; CHECK-NEXT: vmov.16 q0[4], r1
|
||||
; CHECK-NEXT: vmov.16 q0[5], r2
|
||||
; CHECK-NEXT: vmov.16 q0[6], r1
|
||||
; CHECK-NEXT: vmov.16 q0[7], r4
|
||||
; CHECK-NEXT: vmov.16 q0[6], r3
|
||||
; CHECK-NEXT: vmov.16 q0[7], r7
|
||||
; CHECK-NEXT: vmovlb.u8 q0, q0
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
entry:
|
||||
%offs = load <8 x i8*>, <8 x i8*>* %offptr, align 4
|
||||
%gather = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> %offs, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef)
|
||||
|
@ -553,16 +503,14 @@ define arm_aapcs_vfpcc <4 x i32> @ptr_v4i8_sext32(<4 x i8*>* %offptr) {
|
|||
; CHECK-LABEL: ptr_v4i8_sext32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: ldrb r1, [r1]
|
||||
; CHECK-NEXT: ldrb r2, [r2]
|
||||
; CHECK-NEXT: ldrb r1, [r1]
|
||||
; CHECK-NEXT: ldrb r3, [r3]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
; CHECK-NEXT: vmovlb.s8 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
|
@ -577,18 +525,16 @@ define arm_aapcs_vfpcc <4 x i32> @ptr_v4i8_zext32(<4 x i8*>* %offptr) {
|
|||
; CHECK-LABEL: ptr_v4i8_zext32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vmov.i32 q1, #0xff
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: ldrb r1, [r1]
|
||||
; CHECK-NEXT: ldrb r2, [r2]
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: vmov.i32 q0, #0xff
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: ldrb r2, [r2]
|
||||
; CHECK-NEXT: ldrb r1, [r1]
|
||||
; CHECK-NEXT: ldrb r3, [r3]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r1
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r0
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r2, r0
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r1
|
||||
; CHECK-NEXT: vand q0, q1, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%offs = load <4 x i8*>, <4 x i8*>* %offptr, align 4
|
||||
|
@ -600,35 +546,31 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i32> @ptr_v8i8_sext32(<8 x i8*>* %offptr) {
|
||||
; CHECK-LABEL: ptr_v8i8_sext32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
|
||||
; CHECK-NEXT: vmov r1, r2, d1
|
||||
; CHECK-NEXT: vmov r3, r12, d0
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: vmov r4, s5
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: ldrb.w r12, [r1]
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: ldrb.w lr, [r2]
|
||||
; CHECK-NEXT: vmov r2, s7
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: vmov r0, lr, d1
|
||||
; CHECK-NEXT: ldrb r7, [r2]
|
||||
; CHECK-NEXT: vmov r2, r4, d0
|
||||
; CHECK-NEXT: ldrb r6, [r1]
|
||||
; CHECK-NEXT: ldrb r3, [r3]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], lr, r12
|
||||
; CHECK-NEXT: ldrb r4, [r4]
|
||||
; CHECK-NEXT: ldrb r5, [r5]
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r5, r3
|
||||
; CHECK-NEXT: vmovlb.s8 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: ldrb r1, [r1]
|
||||
; CHECK-NEXT: ldrb r2, [r2]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r4, r2
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: ldrb.w r1, [r12]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r6
|
||||
; CHECK-NEXT: ldrb.w r5, [lr]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r7
|
||||
; CHECK-NEXT: vmovlb.s8 q1, q1
|
||||
; CHECK-NEXT: vmovlb.s16 q1, q1
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: ldrb r2, [r2]
|
||||
; CHECK-NEXT: ldrb r4, [r4]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r4, r5
|
||||
; CHECK-NEXT: vmovlb.s8 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
entry:
|
||||
%offs = load <8 x i8*>, <8 x i8*>* %offptr, align 4
|
||||
%gather = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> %offs, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef)
|
||||
|
@ -639,34 +581,30 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i32> @ptr_v8i8_zext32(<8 x i8*>* %offptr) {
|
||||
; CHECK-LABEL: ptr_v8i8_zext32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: vmov r4, s3
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: ldrb.w r12, [r1]
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: ldrb.w lr, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: ldrb r2, [r2]
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
|
||||
; CHECK-NEXT: vmov.i32 q1, #0xff
|
||||
; CHECK-NEXT: ldrb r3, [r3]
|
||||
; CHECK-NEXT: ldrb r4, [r4]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r12
|
||||
; CHECK-NEXT: ldrb r5, [r5]
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r3, lr
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r5, r4
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: vmov r1, r2, d1
|
||||
; CHECK-NEXT: vmov r12, r3, d0
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: vmov r0, lr, d1
|
||||
; CHECK-NEXT: ldrb r7, [r2]
|
||||
; CHECK-NEXT: ldrb r1, [r1]
|
||||
; CHECK-NEXT: ldrb.w r2, [r12]
|
||||
; CHECK-NEXT: ldrb r4, [r4]
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r0, r1
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r2, r1
|
||||
; CHECK-NEXT: ldrb r3, [r3]
|
||||
; CHECK-NEXT: ldrb.w r6, [lr]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r4, r0
|
||||
; CHECK-NEXT: ldrb r5, [r5]
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r3, r7
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r5, r6
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: vand q1, q2, q1
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
entry:
|
||||
%offs = load <8 x i8*>, <8 x i8*>* %offptr, align 4
|
||||
%gather = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> %offs, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef)
|
||||
|
@ -783,37 +721,33 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i32> @sext_unsigned_unscaled_i8_i8_toi64(i8* %base, <8 x i8>* %offptr) {
|
||||
; CHECK-LABEL: sext_unsigned_unscaled_i8_i8_toi64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vldrb.u32 q0, [r1]
|
||||
; CHECK-NEXT: vldrb.u32 q1, [r1, #4]
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vldrb.u32 q0, [r1, #4]
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: vmov r4, s5
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: ldrb.w r12, [r2]
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: ldrb.w lr, [r3]
|
||||
; CHECK-NEXT: vmov r3, s7
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: ldrb r1, [r1]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], lr, r12
|
||||
; CHECK-NEXT: ldrb r4, [r4]
|
||||
; CHECK-NEXT: ldrb r5, [r5]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: ldrb r2, [r2]
|
||||
; CHECK-NEXT: vmov r2, r12, d1
|
||||
; CHECK-NEXT: vmov r3, lr, d0
|
||||
; CHECK-NEXT: vldrb.u32 q0, [r1]
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: ldrb r6, [r2]
|
||||
; CHECK-NEXT: ldrb r3, [r3]
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r5, r2
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r4, r3
|
||||
; CHECK-NEXT: vmovlb.s8 q0, q0
|
||||
; CHECK-NEXT: ldrb.w r12, [r12]
|
||||
; CHECK-NEXT: ldrb.w r2, [lr]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r6
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: ldrb r4, [r4]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r2, r12
|
||||
; CHECK-NEXT: ldrb r1, [r1]
|
||||
; CHECK-NEXT: vmovlb.s8 q1, q1
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: ldrb r5, [r5]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r4, r0
|
||||
; CHECK-NEXT: vmovlb.s16 q1, q1
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r5, r1
|
||||
; CHECK-NEXT: vmovlb.s8 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%offs = load <8 x i8>, <8 x i8>* %offptr, align 1
|
||||
%offs.zext = zext <8 x i8> %offs to <8 x i32>
|
||||
|
|
|
@ -7,16 +7,14 @@ define arm_aapcs_vfpcc <4 x i32> @unscaled_i32_i32_gather(i8* %base, <4 x i32>*
|
|||
; NOGATSCAT: @ %bb.0: @ %entry
|
||||
; NOGATSCAT-NEXT: vldrw.u32 q0, [r1]
|
||||
; NOGATSCAT-NEXT: vadd.i32 q0, q0, r0
|
||||
; NOGATSCAT-NEXT: vmov r0, s2
|
||||
; NOGATSCAT-NEXT: vmov r1, s0
|
||||
; NOGATSCAT-NEXT: vmov r2, s3
|
||||
; NOGATSCAT-NEXT: vmov r3, s1
|
||||
; NOGATSCAT-NEXT: vmov r0, r1, d1
|
||||
; NOGATSCAT-NEXT: vmov r2, r3, d0
|
||||
; NOGATSCAT-NEXT: ldr r0, [r0]
|
||||
; NOGATSCAT-NEXT: ldr r1, [r1]
|
||||
; NOGATSCAT-NEXT: ldr r2, [r2]
|
||||
; NOGATSCAT-NEXT: ldr r1, [r1]
|
||||
; NOGATSCAT-NEXT: ldr r3, [r3]
|
||||
; NOGATSCAT-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; NOGATSCAT-NEXT: vmov q0[3], q0[1], r3, r2
|
||||
; NOGATSCAT-NEXT: vmov q0[2], q0[0], r2, r0
|
||||
; NOGATSCAT-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
; NOGATSCAT-NEXT: bx lr
|
||||
;
|
||||
; NOMVE-LABEL: unscaled_i32_i32_gather:
|
||||
|
@ -46,21 +44,19 @@ declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <
|
|||
define arm_aapcs_vfpcc void @unscaled_i32_i8_scatter(i8* %base, <4 x i8>* %offptr, <4 x i32> %input) {
|
||||
; NOGATSCAT-LABEL: unscaled_i32_i8_scatter:
|
||||
; NOGATSCAT: @ %bb.0: @ %entry
|
||||
; NOGATSCAT-NEXT: .save {r4, r5, r7, lr}
|
||||
; NOGATSCAT-NEXT: push {r4, r5, r7, lr}
|
||||
; NOGATSCAT-NEXT: vldrb.u32 q1, [r1]
|
||||
; NOGATSCAT-NEXT: vmov r1, s0
|
||||
; NOGATSCAT-NEXT: vmov r1, r3, d0
|
||||
; NOGATSCAT-NEXT: vmov r4, r5, d1
|
||||
; NOGATSCAT-NEXT: vadd.i32 q1, q1, r0
|
||||
; NOGATSCAT-NEXT: vmov r0, s4
|
||||
; NOGATSCAT-NEXT: vmov r0, r12, d2
|
||||
; NOGATSCAT-NEXT: vmov r2, lr, d3
|
||||
; NOGATSCAT-NEXT: str r1, [r0]
|
||||
; NOGATSCAT-NEXT: vmov r0, s5
|
||||
; NOGATSCAT-NEXT: vmov r1, s1
|
||||
; NOGATSCAT-NEXT: str r1, [r0]
|
||||
; NOGATSCAT-NEXT: vmov r0, s6
|
||||
; NOGATSCAT-NEXT: vmov r1, s2
|
||||
; NOGATSCAT-NEXT: str r1, [r0]
|
||||
; NOGATSCAT-NEXT: vmov r0, s7
|
||||
; NOGATSCAT-NEXT: vmov r1, s3
|
||||
; NOGATSCAT-NEXT: str r1, [r0]
|
||||
; NOGATSCAT-NEXT: bx lr
|
||||
; NOGATSCAT-NEXT: str.w r3, [r12]
|
||||
; NOGATSCAT-NEXT: str r4, [r2]
|
||||
; NOGATSCAT-NEXT: str.w r5, [lr]
|
||||
; NOGATSCAT-NEXT: pop {r4, r5, r7, pc}
|
||||
;
|
||||
; NOMVE-LABEL: unscaled_i32_i8_scatter:
|
||||
; NOMVE: @ %bb.0: @ %entry
|
||||
|
|
|
@ -4,62 +4,58 @@
|
|||
define arm_aapcs_vfpcc <4 x i32> @loads_i32(<4 x i32> *%A, <4 x i32> *%B, <4 x i32> *%C) {
|
||||
; CHECK-LABEL: loads_i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r1]
|
||||
; CHECK-NEXT: vmov.i64 q0, #0xffffffff
|
||||
; CHECK-NEXT: vldrw.u32 q5, [r2]
|
||||
; CHECK-NEXT: vldrw.u32 q6, [r1]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vmov.i64 q3, #0xffffffff
|
||||
; CHECK-NEXT: vmov.f32 s8, s20
|
||||
; CHECK-NEXT: vmov.f32 s16, s22
|
||||
; CHECK-NEXT: vmov.f32 s10, s21
|
||||
; CHECK-NEXT: vmov.f32 s18, s23
|
||||
; CHECK-NEXT: vmov.f32 s20, s26
|
||||
; CHECK-NEXT: vmov.f32 s22, s27
|
||||
; CHECK-NEXT: vmov.f32 s0, s6
|
||||
; CHECK-NEXT: vand q5, q5, q3
|
||||
; CHECK-NEXT: vmov.f32 s2, s7
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov r2, s20
|
||||
; CHECK-NEXT: vmov r1, s21
|
||||
; CHECK-NEXT: vmov.f32 s26, s25
|
||||
; CHECK-NEXT: vand q3, q6, q3
|
||||
; CHECK-NEXT: vmov.f32 s8, s6
|
||||
; CHECK-NEXT: vmov.f32 s10, s7
|
||||
; CHECK-NEXT: vmov.f32 s6, s5
|
||||
; CHECK-NEXT: asrs r3, r0, #31
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov r2, s16
|
||||
; CHECK-NEXT: asrl r0, r1, r2
|
||||
; CHECK-NEXT: vmov r2, s12
|
||||
; CHECK-NEXT: vand q2, q2, q0
|
||||
; CHECK-NEXT: vand q0, q1, q0
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vmov r4, r1, d4
|
||||
; CHECK-NEXT: vmov.f32 s12, s6
|
||||
; CHECK-NEXT: vmov.f32 s14, s7
|
||||
; CHECK-NEXT: vmov r5, s12
|
||||
; CHECK-NEXT: vmov.f32 s16, s22
|
||||
; CHECK-NEXT: vmov.f32 s18, s23
|
||||
; CHECK-NEXT: vmov r3, lr, d0
|
||||
; CHECK-NEXT: vmov.f32 s6, s5
|
||||
; CHECK-NEXT: vmov r0, r12, d5
|
||||
; CHECK-NEXT: vmov.f32 s8, s20
|
||||
; CHECK-NEXT: vmov.f32 s10, s21
|
||||
; CHECK-NEXT: adds r2, r5, r4
|
||||
; CHECK-NEXT: vmov r4, s16
|
||||
; CHECK-NEXT: asr.w r6, r5, #31
|
||||
; CHECK-NEXT: adcs r1, r6
|
||||
; CHECK-NEXT: asrl r2, r1, r4
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: vmov r3, s13
|
||||
; CHECK-NEXT: adds r4, r1, r2
|
||||
; CHECK-NEXT: vmov r2, s8
|
||||
; CHECK-NEXT: asr.w r12, r1, #31
|
||||
; CHECK-NEXT: adc.w r1, r12, r3
|
||||
; CHECK-NEXT: asrl r4, r1, r2
|
||||
; CHECK-NEXT: vmov r2, s22
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r4, r0
|
||||
; CHECK-NEXT: vmov r3, s23
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov r4, s14
|
||||
; CHECK-NEXT: adds r2, r2, r1
|
||||
; CHECK-NEXT: asr.w r12, r1, #31
|
||||
; CHECK-NEXT: adc.w r1, r12, r3
|
||||
; CHECK-NEXT: adds r6, r1, r3
|
||||
; CHECK-NEXT: vmov r3, s8
|
||||
; CHECK-NEXT: asr.w r4, r1, #31
|
||||
; CHECK-NEXT: adc.w r1, r4, lr
|
||||
; CHECK-NEXT: asrl r6, r1, r3
|
||||
; CHECK-NEXT: vmov r5, r4, d1
|
||||
; CHECK-NEXT: vmov r1, s14
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r6, r2
|
||||
; CHECK-NEXT: adds r0, r0, r1
|
||||
; CHECK-NEXT: asr.w r3, r1, #31
|
||||
; CHECK-NEXT: adc.w r1, r3, r12
|
||||
; CHECK-NEXT: vmov r3, s18
|
||||
; CHECK-NEXT: asrl r2, r1, r3
|
||||
; CHECK-NEXT: vmov r3, s15
|
||||
; CHECK-NEXT: asrs r1, r0, #31
|
||||
; CHECK-NEXT: adds r0, r0, r4
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov r3, s10
|
||||
; CHECK-NEXT: asrl r0, r1, r3
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r2
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: adds r6, r1, r5
|
||||
; CHECK-NEXT: asr.w r2, r1, #31
|
||||
; CHECK-NEXT: adc.w r1, r2, r4
|
||||
; CHECK-NEXT: vmov r2, s10
|
||||
; CHECK-NEXT: asrl r6, r1, r2
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r6, r0
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%a = load <4 x i32>, <4 x i32> *%A, align 4
|
||||
%b = load <4 x i32>, <4 x i32> *%B, align 4
|
||||
|
@ -142,63 +138,62 @@ entry:
|
|||
define arm_aapcs_vfpcc void @load_store_i32(<4 x i32> *%A, <4 x i32> *%B, <4 x i32> *%C, <4 x i32> *%D) {
|
||||
; CHECK-LABEL: load_store_i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: .pad #4
|
||||
; CHECK-NEXT: sub sp, #4
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r1]
|
||||
; CHECK-NEXT: vmov.i64 q0, #0xffffffff
|
||||
; CHECK-NEXT: vldrw.u32 q5, [r2]
|
||||
; CHECK-NEXT: vldrw.u32 q6, [r1]
|
||||
; CHECK-NEXT: vmov.f32 s4, s10
|
||||
; CHECK-NEXT: vmov.f32 s6, s11
|
||||
; CHECK-NEXT: vmov.f32 s10, s9
|
||||
; CHECK-NEXT: vand q1, q1, q0
|
||||
; CHECK-NEXT: vand q2, q2, q0
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vmov.i64 q3, #0xffffffff
|
||||
; CHECK-NEXT: vmov.f32 s4, s20
|
||||
; CHECK-NEXT: vmov r5, r1, d2
|
||||
; CHECK-NEXT: vmov.f32 s12, s2
|
||||
; CHECK-NEXT: vmov.f32 s14, s3
|
||||
; CHECK-NEXT: vmov r6, s12
|
||||
; CHECK-NEXT: vmov.f32 s16, s22
|
||||
; CHECK-NEXT: vmov.f32 s6, s21
|
||||
; CHECK-NEXT: vmov.f32 s18, s23
|
||||
; CHECK-NEXT: vmov.f32 s20, s26
|
||||
; CHECK-NEXT: vmov.f32 s22, s27
|
||||
; CHECK-NEXT: vmov.f32 s8, s2
|
||||
; CHECK-NEXT: vand q5, q5, q3
|
||||
; CHECK-NEXT: vmov.f32 s10, s3
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: vmov r2, s20
|
||||
; CHECK-NEXT: vmov r1, s21
|
||||
; CHECK-NEXT: vmov.f32 s26, s25
|
||||
; CHECK-NEXT: vand q3, q6, q3
|
||||
; CHECK-NEXT: vmov r4, lr, d4
|
||||
; CHECK-NEXT: vmov.f32 s2, s1
|
||||
; CHECK-NEXT: vmov lr, s13
|
||||
; CHECK-NEXT: asr.w r12, r0, #31
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r1, r12
|
||||
; CHECK-NEXT: vmov r2, s16
|
||||
; CHECK-NEXT: asrl r0, r1, r2
|
||||
; CHECK-NEXT: vmov r2, s12
|
||||
; CHECK-NEXT: vmov r0, r12, d3
|
||||
; CHECK-NEXT: vmov.f32 s4, s20
|
||||
; CHECK-NEXT: vmov.f32 s6, s21
|
||||
; CHECK-NEXT: adds r2, r6, r5
|
||||
; CHECK-NEXT: vmov r5, s16
|
||||
; CHECK-NEXT: asr.w r7, r6, #31
|
||||
; CHECK-NEXT: adcs r1, r7
|
||||
; CHECK-NEXT: asrl r2, r1, r5
|
||||
; CHECK-NEXT: vmov r7, s4
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: adds r4, r1, r2
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: asr.w r12, r1, #31
|
||||
; CHECK-NEXT: adc.w r1, r12, lr
|
||||
; CHECK-NEXT: asrl r4, r1, r2
|
||||
; CHECK-NEXT: vmov r2, s22
|
||||
; CHECK-NEXT: vmov r1, s10
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r4, r0
|
||||
; CHECK-NEXT: vmov lr, s23
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r4, s15
|
||||
; CHECK-NEXT: adds r2, r2, r1
|
||||
; CHECK-NEXT: asr.w r12, r1, #31
|
||||
; CHECK-NEXT: adc.w r1, r12, lr
|
||||
; CHECK-NEXT: vmov r12, s18
|
||||
; CHECK-NEXT: asrl r2, r1, r12
|
||||
; CHECK-NEXT: asr.w r12, r0, #31
|
||||
; CHECK-NEXT: adds r4, r4, r1
|
||||
; CHECK-NEXT: asr.w r5, r1, #31
|
||||
; CHECK-NEXT: adc.w r1, r5, lr
|
||||
; CHECK-NEXT: asrl r4, r1, r7
|
||||
; CHECK-NEXT: vmov r6, r5, d5
|
||||
; CHECK-NEXT: vmov r1, s14
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r4, r2
|
||||
; CHECK-NEXT: adds r0, r0, r1
|
||||
; CHECK-NEXT: adc.w r1, r12, r4
|
||||
; CHECK-NEXT: vmov r4, s6
|
||||
; CHECK-NEXT: asrl r0, r1, r4
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r0, r2
|
||||
; CHECK-NEXT: asr.w r7, r1, #31
|
||||
; CHECK-NEXT: adc.w r1, r7, r12
|
||||
; CHECK-NEXT: vmov r7, s18
|
||||
; CHECK-NEXT: asrl r0, r1, r7
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: adds r6, r6, r1
|
||||
; CHECK-NEXT: asr.w r2, r1, #31
|
||||
; CHECK-NEXT: adc.w r1, r2, r5
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: asrl r6, r1, r2
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r6, r0
|
||||
; CHECK-NEXT: vstrw.32 q2, [r3]
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: add sp, #4
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
entry:
|
||||
%a = load <4 x i32>, <4 x i32> *%A, align 4
|
||||
%b = load <4 x i32>, <4 x i32> *%B, align 4
|
||||
|
@ -377,33 +372,31 @@ define arm_aapcs_vfpcc void @mul_i32(<4 x i32> *%A, <4 x i32> *%B, i64 %C, <4 x
|
|||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r1]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: ldr.w lr, [sp, #20]
|
||||
; CHECK-NEXT: vmov.f32 s8, s0
|
||||
; CHECK-NEXT: vmov.f32 s12, s4
|
||||
; CHECK-NEXT: vmov.f32 s10, s1
|
||||
; CHECK-NEXT: vmov.f32 s14, s5
|
||||
; CHECK-NEXT: vmov.f32 s10, s1
|
||||
; CHECK-NEXT: vmov r5, s12
|
||||
; CHECK-NEXT: vmov r1, s14
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: smull r12, r3, r1, r0
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: vmov r1, s12
|
||||
; CHECK-NEXT: vmov r5, s10
|
||||
; CHECK-NEXT: vmov r4, s14
|
||||
; CHECK-NEXT: vmov.f32 s8, s2
|
||||
; CHECK-NEXT: vmov.f32 s10, s3
|
||||
; CHECK-NEXT: vmov.f32 s0, s6
|
||||
; CHECK-NEXT: asrl r12, r3, r2
|
||||
; CHECK-NEXT: vmov.f32 s2, s7
|
||||
; CHECK-NEXT: vmullb.s32 q1, q0, q2
|
||||
; CHECK-NEXT: vmov r7, s7
|
||||
; CHECK-NEXT: vmov r6, s4
|
||||
; CHECK-NEXT: smull r0, r3, r1, r0
|
||||
; CHECK-NEXT: ldr r1, [sp, #20]
|
||||
; CHECK-NEXT: asrl r0, r3, r2
|
||||
; CHECK-NEXT: smull r12, r5, r4, r5
|
||||
; CHECK-NEXT: vmov r4, s6
|
||||
; CHECK-NEXT: vmov r6, r1, d2
|
||||
; CHECK-NEXT: vmov r4, r7, d3
|
||||
; CHECK-NEXT: asrl r6, r1, r2
|
||||
; CHECK-NEXT: asrl r4, r7, r2
|
||||
; CHECK-NEXT: vmov r7, s5
|
||||
; CHECK-NEXT: asrl r6, r7, r2
|
||||
; CHECK-NEXT: asrl r12, r5, r2
|
||||
; CHECK-NEXT: smull r0, r5, r5, r0
|
||||
; CHECK-NEXT: asrl r0, r5, r2
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r6
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r12, r4
|
||||
; CHECK-NEXT: vstrw.32 q0, [r1]
|
||||
; CHECK-NEXT: vstrw.32 q0, [lr]
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
entry:
|
||||
%a = load <4 x i32>, <4 x i32> *%A, align 4
|
||||
|
|
|
@ -180,52 +180,45 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @ext_add_ashr_trunc_i32(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: ext_add_ashr_trunc_i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov.f32 s16, s6
|
||||
; CHECK-NEXT: vmov.i64 q3, #0xffffffff
|
||||
; CHECK-NEXT: vmov.f32 s18, s7
|
||||
; CHECK-NEXT: vmov.f32 s8, s2
|
||||
; CHECK-NEXT: vand q4, q4, q3
|
||||
; CHECK-NEXT: vmov.f32 s10, s3
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: vmov r3, s16
|
||||
; CHECK-NEXT: vmov r2, s17
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vmov.f32 s12, s6
|
||||
; CHECK-NEXT: vmov.i64 q2, #0xffffffff
|
||||
; CHECK-NEXT: vmov.f32 s14, s7
|
||||
; CHECK-NEXT: vmov.f32 s6, s5
|
||||
; CHECK-NEXT: vand q3, q1, q3
|
||||
; CHECK-NEXT: vand q3, q3, q2
|
||||
; CHECK-NEXT: vand q1, q1, q2
|
||||
; CHECK-NEXT: vmov.f32 s8, s2
|
||||
; CHECK-NEXT: vmov.f32 s10, s3
|
||||
; CHECK-NEXT: vmov r4, s8
|
||||
; CHECK-NEXT: vmov r0, r1, d6
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: vmov.f32 s2, s1
|
||||
; CHECK-NEXT: vmov lr, s19
|
||||
; CHECK-NEXT: asrs r1, r0, #31
|
||||
; CHECK-NEXT: adds r0, r0, r3
|
||||
; CHECK-NEXT: adcs r1, r2
|
||||
; CHECK-NEXT: vmov r2, s12
|
||||
; CHECK-NEXT: vmov r12, lr, d7
|
||||
; CHECK-NEXT: adds r0, r0, r4
|
||||
; CHECK-NEXT: asr.w r5, r4, #31
|
||||
; CHECK-NEXT: adcs r1, r5
|
||||
; CHECK-NEXT: lsrl r0, r1, #1
|
||||
; CHECK-NEXT: vmov r3, s13
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: adds r2, r2, r1
|
||||
; CHECK-NEXT: asr.w r12, r1, #31
|
||||
; CHECK-NEXT: adc.w r1, r12, r3
|
||||
; CHECK-NEXT: lsrl r2, r1, #1
|
||||
; CHECK-NEXT: vmov r3, s18
|
||||
; CHECK-NEXT: vmov r1, s10
|
||||
; CHECK-NEXT: asr.w r4, r1, #31
|
||||
; CHECK-NEXT: adcs r3, r4
|
||||
; CHECK-NEXT: lsrl r2, r3, #1
|
||||
; CHECK-NEXT: vmov r1, r5, d3
|
||||
; CHECK-NEXT: vmov r3, s10
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r2, r0
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r2, s15
|
||||
; CHECK-NEXT: adds r4, r1, r3
|
||||
; CHECK-NEXT: vmov r3, s14
|
||||
; CHECK-NEXT: asr.w r12, r1, #31
|
||||
; CHECK-NEXT: adc.w r1, r12, lr
|
||||
; CHECK-NEXT: lsrl r4, r1, #1
|
||||
; CHECK-NEXT: asrs r1, r0, #31
|
||||
; CHECK-NEXT: adds r0, r0, r3
|
||||
; CHECK-NEXT: adcs r1, r2
|
||||
; CHECK-NEXT: adds.w r4, r3, r12
|
||||
; CHECK-NEXT: asr.w r6, r3, #31
|
||||
; CHECK-NEXT: adc.w r3, r6, lr
|
||||
; CHECK-NEXT: asrs r2, r0, #31
|
||||
; CHECK-NEXT: adds r0, r0, r1
|
||||
; CHECK-NEXT: adc.w r1, r2, r5
|
||||
; CHECK-NEXT: lsrl r4, r3, #1
|
||||
; CHECK-NEXT: lsrl r0, r1, #1
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r0, r4
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%sa = sext <4 x i32> %a to <4 x i64>
|
||||
%sb = zext <4 x i32> %b to <4 x i64>
|
||||
|
@ -282,112 +275,105 @@ entry:
|
|||
define arm_aapcs_vfpcc <16 x i8> @ext_add_ashr_trunc_i8i32(<16 x i8> %a, <16 x i8> %b) {
|
||||
; CHECK-LABEL: ext_add_ashr_trunc_i8i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[14]
|
||||
; CHECK-NEXT: vmov.u8 r1, q1[12]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[15]
|
||||
; CHECK-NEXT: vmov.u8 r1, q1[13]
|
||||
; CHECK-NEXT: vmov.i32 q2, #0xff
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[14]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[12]
|
||||
; CHECK-NEXT: vand q3, q3, q2
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[15]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[13]
|
||||
; CHECK-NEXT: vmov.u8 r4, q1[6]
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[2]
|
||||
; CHECK-NEXT: vmovlb.s8 q4, q4
|
||||
; CHECK-NEXT: vmov.u8 r1, q1[0]
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmovlb.s16 q4, q4
|
||||
; CHECK-NEXT: vmov.u8 r5, q1[4]
|
||||
; CHECK-NEXT: vadd.i32 q3, q4, q3
|
||||
; CHECK-NEXT: vshr.u32 q3, q3, #1
|
||||
; CHECK-NEXT: vmov lr, r12, d7
|
||||
; CHECK-NEXT: vmov r3, r2, d6
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[3]
|
||||
; CHECK-NEXT: vmov.u8 r1, q1[1]
|
||||
; CHECK-NEXT: vmov.i32 q3, #0xff
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[2]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[0]
|
||||
; CHECK-NEXT: vand q2, q2, q3
|
||||
; CHECK-NEXT: vand q3, q3, q2
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[3]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[1]
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r1, q1[4]
|
||||
; CHECK-NEXT: vmovlb.s8 q4, q4
|
||||
; CHECK-NEXT: vmovlb.s16 q4, q4
|
||||
; CHECK-NEXT: vadd.i32 q2, q4, q2
|
||||
; CHECK-NEXT: vshr.u32 q4, q2, #1
|
||||
; CHECK-NEXT: vmov r0, s16
|
||||
; CHECK-NEXT: vmov.8 q2[0], r0
|
||||
; CHECK-NEXT: vmov r0, s17
|
||||
; CHECK-NEXT: vmov.8 q2[1], r0
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov.8 q2[2], r0
|
||||
; CHECK-NEXT: vmov r0, s19
|
||||
; CHECK-NEXT: vmov.8 q2[3], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[6]
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[7]
|
||||
; CHECK-NEXT: vmov.u8 r1, q1[5]
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[6]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[4]
|
||||
; CHECK-NEXT: vand q4, q4, q3
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[7]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[5]
|
||||
; CHECK-NEXT: vmov q5[3], q5[1], r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r1, q1[8]
|
||||
; CHECK-NEXT: vadd.i32 q3, q4, q3
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r5, r4
|
||||
; CHECK-NEXT: vmov.u8 r4, q1[7]
|
||||
; CHECK-NEXT: vmov.u8 r5, q1[5]
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r5, r4
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[6]
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[4]
|
||||
; CHECK-NEXT: vshr.u32 q3, q3, #1
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r5, r4
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[7]
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[5]
|
||||
; CHECK-NEXT: vand q4, q4, q2
|
||||
; CHECK-NEXT: vmov q5[3], q5[1], r5, r4
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[10]
|
||||
; CHECK-NEXT: vmovlb.s8 q5, q5
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[8]
|
||||
; CHECK-NEXT: vmovlb.s16 q5, q5
|
||||
; CHECK-NEXT: vmov r1, r0, d6
|
||||
; CHECK-NEXT: vadd.i32 q4, q5, q4
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r5, r4
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[11]
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[9]
|
||||
; CHECK-NEXT: vmov q5[3], q5[1], r5, r4
|
||||
; CHECK-NEXT: vmov.8 q0[0], r1
|
||||
; CHECK-NEXT: vmov.u8 r4, q1[10]
|
||||
; CHECK-NEXT: vmov.u8 r5, q1[8]
|
||||
; CHECK-NEXT: vmov q6[2], q6[0], r5, r4
|
||||
; CHECK-NEXT: vmov.8 q0[1], r0
|
||||
; CHECK-NEXT: vmov r0, r1, d7
|
||||
; CHECK-NEXT: vmov.u8 r4, q1[11]
|
||||
; CHECK-NEXT: vmov.u8 r5, q1[9]
|
||||
; CHECK-NEXT: vmov.8 q0[2], r0
|
||||
; CHECK-NEXT: vmov q6[3], q6[1], r5, r4
|
||||
; CHECK-NEXT: vshr.u32 q4, q4, #1
|
||||
; CHECK-NEXT: vmov r0, s16
|
||||
; CHECK-NEXT: vmov.8 q2[4], r0
|
||||
; CHECK-NEXT: vmov r0, s17
|
||||
; CHECK-NEXT: vmov.8 q2[5], r0
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov.8 q2[6], r0
|
||||
; CHECK-NEXT: vmov r0, s19
|
||||
; CHECK-NEXT: vmov.8 q2[7], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[10]
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[11]
|
||||
; CHECK-NEXT: vmov.u8 r1, q1[9]
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[10]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[8]
|
||||
; CHECK-NEXT: vand q4, q4, q3
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[11]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[9]
|
||||
; CHECK-NEXT: vmov q5[3], q5[1], r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r1, q1[12]
|
||||
; CHECK-NEXT: vmovlb.s8 q5, q5
|
||||
; CHECK-NEXT: vmovlb.s16 q5, q5
|
||||
; CHECK-NEXT: vadd.i32 q4, q5, q4
|
||||
; CHECK-NEXT: vshr.u32 q4, q4, #1
|
||||
; CHECK-NEXT: vmov r0, s16
|
||||
; CHECK-NEXT: vmov.8 q2[8], r0
|
||||
; CHECK-NEXT: vmov r0, s17
|
||||
; CHECK-NEXT: vmov.8 q2[9], r0
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov.8 q2[10], r0
|
||||
; CHECK-NEXT: vmov r0, s19
|
||||
; CHECK-NEXT: vmov.8 q2[11], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[14]
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[15]
|
||||
; CHECK-NEXT: vmov.u8 r1, q1[13]
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[14]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[12]
|
||||
; CHECK-NEXT: vand q1, q4, q3
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[15]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[13]
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: vmovlb.s8 q0, q3
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vshr.u32 q0, q0, #1
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov.8 q2[12], r0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov.8 q2[13], r0
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov.8 q2[14], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov.8 q2[15], r0
|
||||
; CHECK-NEXT: vmov q0, q2
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov.8 q0[3], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d8
|
||||
; CHECK-NEXT: vand q1, q6, q2
|
||||
; CHECK-NEXT: vmovlb.s8 q2, q5
|
||||
; CHECK-NEXT: vmov.8 q0[4], r0
|
||||
; CHECK-NEXT: vmovlb.s16 q2, q2
|
||||
; CHECK-NEXT: vadd.i32 q1, q2, q1
|
||||
; CHECK-NEXT: vmov r4, r5, d9
|
||||
; CHECK-NEXT: vmov.8 q0[5], r1
|
||||
; CHECK-NEXT: vshr.u32 q1, q1, #1
|
||||
; CHECK-NEXT: vmov.8 q0[6], r4
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: vmov.8 q0[7], r5
|
||||
; CHECK-NEXT: vmov r4, r5, d2
|
||||
; CHECK-NEXT: vmov.8 q0[8], r4
|
||||
; CHECK-NEXT: vmov.8 q0[9], r5
|
||||
; CHECK-NEXT: vmov.8 q0[10], r0
|
||||
; CHECK-NEXT: vmov.8 q0[11], r1
|
||||
; CHECK-NEXT: vmov.8 q0[12], r3
|
||||
; CHECK-NEXT: vmov.8 q0[13], r2
|
||||
; CHECK-NEXT: vmov.8 q0[14], lr
|
||||
; CHECK-NEXT: vmov.8 q0[15], r12
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%sa = sext <16 x i8> %a to <16 x i32>
|
||||
%sb = zext <16 x i8> %b to <16 x i32>
|
||||
|
@ -406,114 +392,115 @@ define arm_aapcs_vfpcc <4 x i32> @ext_ops_trunc_i32(<4 x i32> %a, <4 x i32> %b)
|
|||
; CHECK-NEXT: sub sp, #4
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: .pad #8
|
||||
; CHECK-NEXT: sub sp, #8
|
||||
; CHECK-NEXT: vmov.f32 s8, s6
|
||||
; CHECK-NEXT: vmov.i64 q4, #0xffffffff
|
||||
; CHECK-NEXT: vmov.i64 q3, #0xffffffff
|
||||
; CHECK-NEXT: vmov.f32 s10, s7
|
||||
; CHECK-NEXT: vmov.f32 s12, s2
|
||||
; CHECK-NEXT: vand q2, q2, q4
|
||||
; CHECK-NEXT: vmov.f32 s14, s3
|
||||
; CHECK-NEXT: vmov r10, s12
|
||||
; CHECK-NEXT: vmov r4, s8
|
||||
; CHECK-NEXT: vmov r1, s9
|
||||
; CHECK-NEXT: vmov.f32 s6, s5
|
||||
; CHECK-NEXT: vand q1, q1, q4
|
||||
; CHECK-NEXT: vand q2, q2, q3
|
||||
; CHECK-NEXT: vand q1, q1, q3
|
||||
; CHECK-NEXT: vmov.f32 s12, s2
|
||||
; CHECK-NEXT: vmov.f32 s14, s3
|
||||
; CHECK-NEXT: vmov.f32 s2, s1
|
||||
; CHECK-NEXT: vmov r9, s0
|
||||
; CHECK-NEXT: vmov r11, s4
|
||||
; CHECK-NEXT: vmov r7, s5
|
||||
; CHECK-NEXT: adds.w r2, r10, r4
|
||||
; CHECK-NEXT: asr.w r0, r10, #31
|
||||
; CHECK-NEXT: adc.w r5, r0, r1
|
||||
; CHECK-NEXT: asrl r2, r5, r4
|
||||
; CHECK-NEXT: subs r6, r2, r4
|
||||
; CHECK-NEXT: sbc.w r12, r5, r1
|
||||
; CHECK-NEXT: adds.w r0, r9, r11
|
||||
; CHECK-NEXT: asr.w r2, r9, #31
|
||||
; CHECK-NEXT: adc.w r3, r2, r7
|
||||
; CHECK-NEXT: umull r8, r5, r6, r4
|
||||
; CHECK-NEXT: asrl r0, r3, r11
|
||||
; CHECK-NEXT: subs.w r0, r0, r11
|
||||
; CHECK-NEXT: mla r5, r12, r4, r5
|
||||
; CHECK-NEXT: sbc.w r12, r3, r7
|
||||
; CHECK-NEXT: umull r2, r1, r0, r11
|
||||
; CHECK-NEXT: muls r0, r7, r0
|
||||
; CHECK-NEXT: vmov r7, s14
|
||||
; CHECK-NEXT: orr.w lr, r1, r0
|
||||
; CHECK-NEXT: rsb.w r0, r10, #0
|
||||
; CHECK-NEXT: lsll r8, r5, r0
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov r1, s11
|
||||
; CHECK-NEXT: lsll r8, r5, r4
|
||||
; CHECK-NEXT: vmov r5, s6
|
||||
; CHECK-NEXT: eor.w r4, r4, r10
|
||||
; CHECK-NEXT: orr.w r4, r4, r10, asr #31
|
||||
; CHECK-NEXT: asrs r3, r7, #31
|
||||
; CHECK-NEXT: adds r6, r7, r0
|
||||
; CHECK-NEXT: adcs r3, r1
|
||||
; CHECK-NEXT: asrl r6, r3, r0
|
||||
; CHECK-NEXT: subs r6, r6, r0
|
||||
; CHECK-NEXT: sbc.w r1, r3, r1
|
||||
; CHECK-NEXT: umull r6, r3, r6, r0
|
||||
; CHECK-NEXT: mla r1, r1, r0, r3
|
||||
; CHECK-NEXT: rsbs r3, r7, #0
|
||||
; CHECK-NEXT: lsll r6, r1, r3
|
||||
; CHECK-NEXT: lsll r6, r1, r0
|
||||
; CHECK-NEXT: eors r0, r7
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r8, r6
|
||||
; CHECK-NEXT: vmov r6, s2
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: orr.w r0, r0, r7, asr #31
|
||||
; CHECK-NEXT: adds.w r8, r6, r5
|
||||
; CHECK-NEXT: eor.w r7, r6, r5
|
||||
; CHECK-NEXT: asr.w r3, r6, #31
|
||||
; CHECK-NEXT: orr.w r7, r7, r6, asr #31
|
||||
; CHECK-NEXT: adcs r3, r1
|
||||
; CHECK-NEXT: asrl r8, r3, r5
|
||||
; CHECK-NEXT: subs.w r8, r8, r5
|
||||
; CHECK-NEXT: sbcs r3, r1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: mul r1, r8, r1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov r12, r2, d5
|
||||
; CHECK-NEXT: vmov r8, r9, d3
|
||||
; CHECK-NEXT: vmov r1, s14
|
||||
; CHECK-NEXT: vmov lr, s2
|
||||
; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill
|
||||
; CHECK-NEXT: adds.w r4, r1, r12
|
||||
; CHECK-NEXT: asr.w r0, r1, #31
|
||||
; CHECK-NEXT: adc.w r5, r0, r2
|
||||
; CHECK-NEXT: asrl r4, r5, r12
|
||||
; CHECK-NEXT: subs.w r0, r4, r12
|
||||
; CHECK-NEXT: sbc.w r2, r5, r2
|
||||
; CHECK-NEXT: asr.w r5, lr, #31
|
||||
; CHECK-NEXT: umull r0, r4, r0, r12
|
||||
; CHECK-NEXT: adds.w r6, lr, r8
|
||||
; CHECK-NEXT: adc.w r5, r5, r9
|
||||
; CHECK-NEXT: asrl r6, r5, r8
|
||||
; CHECK-NEXT: mla r3, r2, r12, r4
|
||||
; CHECK-NEXT: subs.w r7, r6, r8
|
||||
; CHECK-NEXT: sbc.w r10, r5, r9
|
||||
; CHECK-NEXT: rsbs r2, r1, #0
|
||||
; CHECK-NEXT: vmov r5, s12
|
||||
; CHECK-NEXT: lsll r0, r3, r2
|
||||
; CHECK-NEXT: vmov r6, r2, d4
|
||||
; CHECK-NEXT: lsll r0, r3, r12
|
||||
; CHECK-NEXT: asrs r3, r5, #31
|
||||
; CHECK-NEXT: adds r4, r5, r6
|
||||
; CHECK-NEXT: adcs r3, r2
|
||||
; CHECK-NEXT: asrl r4, r3, r6
|
||||
; CHECK-NEXT: subs r4, r4, r6
|
||||
; CHECK-NEXT: sbc.w r2, r3, r2
|
||||
; CHECK-NEXT: umull r4, r3, r4, r6
|
||||
; CHECK-NEXT: mla r3, r2, r6, r3
|
||||
; CHECK-NEXT: rsbs r2, r5, #0
|
||||
; CHECK-NEXT: lsll r4, r3, r2
|
||||
; CHECK-NEXT: lsll r4, r3, r6
|
||||
; CHECK-NEXT: eors r6, r5
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r4, r0
|
||||
; CHECK-NEXT: umull r2, r0, r7, r8
|
||||
; CHECK-NEXT: orr.w r6, r6, r5, asr #31
|
||||
; CHECK-NEXT: mul r3, r7, r9
|
||||
; CHECK-NEXT: vmov r7, s0
|
||||
; CHECK-NEXT: orrs r0, r3
|
||||
; CHECK-NEXT: vmov r3, r4, d2
|
||||
; CHECK-NEXT: mla r11, r10, r8, r0
|
||||
; CHECK-NEXT: asr.w r9, r7, #31
|
||||
; CHECK-NEXT: adds r0, r7, r3
|
||||
; CHECK-NEXT: adc.w r9, r9, r4
|
||||
; CHECK-NEXT: asrl r0, r9, r3
|
||||
; CHECK-NEXT: subs.w r10, r0, r3
|
||||
; CHECK-NEXT: sbc.w r9, r9, r4
|
||||
; CHECK-NEXT: umull r0, r1, r10, r3
|
||||
; CHECK-NEXT: mul r4, r10, r4
|
||||
; CHECK-NEXT: orr.w r10, r1, r4
|
||||
; CHECK-NEXT: eor.w r1, lr, r8
|
||||
; CHECK-NEXT: orr.w r1, r1, lr, asr #31
|
||||
; CHECK-NEXT: eor.w r4, r7, r3
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: orr.w r4, r4, r7, asr #31
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: rsbs r7, r7, #0
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: cmp r4, #0
|
||||
; CHECK-NEXT: cset r4, eq
|
||||
; CHECK-NEXT: cmp r4, #0
|
||||
; CHECK-NEXT: csetm r4, ne
|
||||
; CHECK-NEXT: vmov.32 q4[1], r4
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r4, r0
|
||||
; CHECK-NEXT: umull r4, r0, r8, r5
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: eor.w r1, r9, r11
|
||||
; CHECK-NEXT: orr.w r1, r1, r9, asr #31
|
||||
; CHECK-NEXT: vmov.32 q0[1], r4
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r4, r1
|
||||
; CHECK-NEXT: ldr r4, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-NEXT: vbic q4, q1, q0
|
||||
; CHECK-NEXT: eor.w r1, r4, r12
|
||||
; CHECK-NEXT: orr.w r1, r1, r4, asr #31
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: cmp r7, #0
|
||||
; CHECK-NEXT: cset r7, eq
|
||||
; CHECK-NEXT: vmov.32 q0[1], r1
|
||||
; CHECK-NEXT: cmp r7, #0
|
||||
; CHECK-NEXT: csetm r7, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r7
|
||||
; CHECK-NEXT: mla r7, r3, r5, r0
|
||||
; CHECK-NEXT: rsbs r1, r6, #0
|
||||
; CHECK-NEXT: vbic q5, q1, q0
|
||||
; CHECK-NEXT: mla r3, r12, r11, lr
|
||||
; CHECK-NEXT: rsb.w r0, r9, #0
|
||||
; CHECK-NEXT: lsll r4, r7, r1
|
||||
; CHECK-NEXT: vbic q1, q2, q4
|
||||
; CHECK-NEXT: lsll r2, r3, r0
|
||||
; CHECK-NEXT: vand q2, q3, q4
|
||||
; CHECK-NEXT: lsll r4, r7, r5
|
||||
; CHECK-NEXT: lsll r2, r3, r11
|
||||
; CHECK-NEXT: cmp r6, #0
|
||||
; CHECK-NEXT: cset r6, eq
|
||||
; CHECK-NEXT: cmp r6, #0
|
||||
; CHECK-NEXT: csetm r6, ne
|
||||
; CHECK-NEXT: vmov.32 q5[1], r6
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r6, r1
|
||||
; CHECK-NEXT: mla r1, r9, r3, r10
|
||||
; CHECK-NEXT: rsb.w r6, lr, #0
|
||||
; CHECK-NEXT: vbic q1, q2, q5
|
||||
; CHECK-NEXT: lsll r2, r11, r6
|
||||
; CHECK-NEXT: lsll r0, r1, r7
|
||||
; CHECK-NEXT: vand q2, q3, q5
|
||||
; CHECK-NEXT: lsll r2, r11, r8
|
||||
; CHECK-NEXT: lsll r0, r1, r3
|
||||
; CHECK-NEXT: vorr q1, q2, q1
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r2, r4
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r0, r2
|
||||
; CHECK-NEXT: vand q0, q2, q0
|
||||
; CHECK-NEXT: vorr q0, q0, q5
|
||||
; CHECK-NEXT: vorr q0, q0, q4
|
||||
; CHECK-NEXT: vmov.f32 s1, s2
|
||||
; CHECK-NEXT: vmov.f32 s2, s4
|
||||
; CHECK-NEXT: vmov.f32 s3, s6
|
||||
; CHECK-NEXT: add sp, #8
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: add sp, #4
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
|
|
|
@ -1750,28 +1750,28 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @masked_v2i64_align4_zero(<2 x i64> *%dest, <2 x i64> %a) {
|
||||
; CHECK-LE-LABEL: masked_v2i64_align4_zero:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: .save {r7, lr}
|
||||
; CHECK-LE-NEXT: push {r7, lr}
|
||||
; CHECK-LE-NEXT: .pad #4
|
||||
; CHECK-LE-NEXT: sub sp, #4
|
||||
; CHECK-LE-NEXT: vmov r3, s0
|
||||
; CHECK-LE-NEXT: movs r2, #0
|
||||
; CHECK-LE-NEXT: vmov r1, s1
|
||||
; CHECK-LE-NEXT: vmov r12, s3
|
||||
; CHECK-LE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-LE-NEXT: vmov r3, s2
|
||||
; CHECK-LE-NEXT: sbcs.w r1, r2, r1
|
||||
; CHECK-LE-NEXT: vmov r1, r2, d0
|
||||
; CHECK-LE-NEXT: movs r3, #0
|
||||
; CHECK-LE-NEXT: vmov lr, r12, d1
|
||||
; CHECK-LE-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r1, r3, r2
|
||||
; CHECK-LE-NEXT: mov.w r1, #0
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt r1, #1
|
||||
; CHECK-LE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r3, r2, r12
|
||||
; CHECK-LE-NEXT: rsbs.w r2, lr, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r2, r3, r12
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt r2, #1
|
||||
; CHECK-LE-NEXT: cmp r2, #0
|
||||
; CHECK-LE-NEXT: movlt r3, #1
|
||||
; CHECK-LE-NEXT: cmp r3, #0
|
||||
; CHECK-LE-NEXT: it ne
|
||||
; CHECK-LE-NEXT: mvnne r2, #1
|
||||
; CHECK-LE-NEXT: bfi r2, r1, #0, #1
|
||||
; CHECK-LE-NEXT: and r1, r2, #3
|
||||
; CHECK-LE-NEXT: lsls r2, r2, #31
|
||||
; CHECK-LE-NEXT: mvnne r3, #1
|
||||
; CHECK-LE-NEXT: bfi r3, r1, #0, #1
|
||||
; CHECK-LE-NEXT: and r1, r3, #3
|
||||
; CHECK-LE-NEXT: lsls r2, r3, #31
|
||||
; CHECK-LE-NEXT: beq .LBB49_2
|
||||
; CHECK-LE-NEXT: @ %bb.1: @ %cond.load
|
||||
; CHECK-LE-NEXT: vldr d1, .LCPI49_0
|
||||
|
@ -1784,7 +1784,7 @@ define arm_aapcs_vfpcc <2 x i64> @masked_v2i64_align4_zero(<2 x i64> *%dest, <2
|
|||
; CHECK-LE-NEXT: it mi
|
||||
; CHECK-LE-NEXT: vldrmi d1, [r0, #8]
|
||||
; CHECK-LE-NEXT: add sp, #4
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
; CHECK-LE-NEXT: pop {r7, pc}
|
||||
; CHECK-LE-NEXT: .p2align 3
|
||||
; CHECK-LE-NEXT: @ %bb.4:
|
||||
; CHECK-LE-NEXT: .LCPI49_0:
|
||||
|
@ -1793,29 +1793,29 @@ define arm_aapcs_vfpcc <2 x i64> @masked_v2i64_align4_zero(<2 x i64> *%dest, <2
|
|||
;
|
||||
; CHECK-BE-LABEL: masked_v2i64_align4_zero:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: .save {r7, lr}
|
||||
; CHECK-BE-NEXT: push {r7, lr}
|
||||
; CHECK-BE-NEXT: .pad #4
|
||||
; CHECK-BE-NEXT: sub sp, #4
|
||||
; CHECK-BE-NEXT: vrev64.32 q1, q0
|
||||
; CHECK-BE-NEXT: movs r2, #0
|
||||
; CHECK-BE-NEXT: vmov r3, s7
|
||||
; CHECK-BE-NEXT: vmov r1, s6
|
||||
; CHECK-BE-NEXT: vmov r12, s4
|
||||
; CHECK-BE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-BE-NEXT: vmov r3, s5
|
||||
; CHECK-BE-NEXT: sbcs.w r1, r2, r1
|
||||
; CHECK-BE-NEXT: movs r3, #0
|
||||
; CHECK-BE-NEXT: vmov r1, r2, d3
|
||||
; CHECK-BE-NEXT: vmov r12, lr, d2
|
||||
; CHECK-BE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-BE-NEXT: sbcs.w r1, r3, r1
|
||||
; CHECK-BE-NEXT: mov.w r1, #0
|
||||
; CHECK-BE-NEXT: it lt
|
||||
; CHECK-BE-NEXT: movlt r1, #1
|
||||
; CHECK-BE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-BE-NEXT: sbcs.w r3, r2, r12
|
||||
; CHECK-BE-NEXT: rsbs.w r2, lr, #0
|
||||
; CHECK-BE-NEXT: sbcs.w r2, r3, r12
|
||||
; CHECK-BE-NEXT: it lt
|
||||
; CHECK-BE-NEXT: movlt r2, #1
|
||||
; CHECK-BE-NEXT: cmp r2, #0
|
||||
; CHECK-BE-NEXT: movlt r3, #1
|
||||
; CHECK-BE-NEXT: cmp r3, #0
|
||||
; CHECK-BE-NEXT: it ne
|
||||
; CHECK-BE-NEXT: mvnne r2, #1
|
||||
; CHECK-BE-NEXT: bfi r2, r1, #0, #1
|
||||
; CHECK-BE-NEXT: and r1, r2, #3
|
||||
; CHECK-BE-NEXT: lsls r2, r2, #30
|
||||
; CHECK-BE-NEXT: mvnne r3, #1
|
||||
; CHECK-BE-NEXT: bfi r3, r1, #0, #1
|
||||
; CHECK-BE-NEXT: and r1, r3, #3
|
||||
; CHECK-BE-NEXT: lsls r2, r3, #30
|
||||
; CHECK-BE-NEXT: bpl .LBB49_2
|
||||
; CHECK-BE-NEXT: @ %bb.1: @ %cond.load
|
||||
; CHECK-BE-NEXT: vldr d1, .LCPI49_0
|
||||
|
@ -1828,7 +1828,7 @@ define arm_aapcs_vfpcc <2 x i64> @masked_v2i64_align4_zero(<2 x i64> *%dest, <2
|
|||
; CHECK-BE-NEXT: it ne
|
||||
; CHECK-BE-NEXT: vldrne d1, [r0, #8]
|
||||
; CHECK-BE-NEXT: add sp, #4
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
; CHECK-BE-NEXT: pop {r7, pc}
|
||||
; CHECK-BE-NEXT: .p2align 3
|
||||
; CHECK-BE-NEXT: @ %bb.4:
|
||||
; CHECK-BE-NEXT: .LCPI49_0:
|
||||
|
@ -1843,28 +1843,28 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x double> @masked_v2f64_align4_zero(<2 x double> *%dest, <2 x double> %a, <2 x i64> %b) {
|
||||
; CHECK-LE-LABEL: masked_v2f64_align4_zero:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: .save {r7, lr}
|
||||
; CHECK-LE-NEXT: push {r7, lr}
|
||||
; CHECK-LE-NEXT: .pad #4
|
||||
; CHECK-LE-NEXT: sub sp, #4
|
||||
; CHECK-LE-NEXT: vmov r3, s4
|
||||
; CHECK-LE-NEXT: movs r2, #0
|
||||
; CHECK-LE-NEXT: vmov r1, s5
|
||||
; CHECK-LE-NEXT: vmov r12, s7
|
||||
; CHECK-LE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-LE-NEXT: vmov r3, s6
|
||||
; CHECK-LE-NEXT: sbcs.w r1, r2, r1
|
||||
; CHECK-LE-NEXT: vmov r1, r2, d2
|
||||
; CHECK-LE-NEXT: movs r3, #0
|
||||
; CHECK-LE-NEXT: vmov lr, r12, d3
|
||||
; CHECK-LE-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r1, r3, r2
|
||||
; CHECK-LE-NEXT: mov.w r1, #0
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt r1, #1
|
||||
; CHECK-LE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r3, r2, r12
|
||||
; CHECK-LE-NEXT: rsbs.w r2, lr, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r2, r3, r12
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt r2, #1
|
||||
; CHECK-LE-NEXT: cmp r2, #0
|
||||
; CHECK-LE-NEXT: movlt r3, #1
|
||||
; CHECK-LE-NEXT: cmp r3, #0
|
||||
; CHECK-LE-NEXT: it ne
|
||||
; CHECK-LE-NEXT: mvnne r2, #1
|
||||
; CHECK-LE-NEXT: bfi r2, r1, #0, #1
|
||||
; CHECK-LE-NEXT: and r1, r2, #3
|
||||
; CHECK-LE-NEXT: lsls r2, r2, #31
|
||||
; CHECK-LE-NEXT: mvnne r3, #1
|
||||
; CHECK-LE-NEXT: bfi r3, r1, #0, #1
|
||||
; CHECK-LE-NEXT: and r1, r3, #3
|
||||
; CHECK-LE-NEXT: lsls r2, r3, #31
|
||||
; CHECK-LE-NEXT: beq .LBB50_2
|
||||
; CHECK-LE-NEXT: @ %bb.1: @ %cond.load
|
||||
; CHECK-LE-NEXT: vldr d1, .LCPI50_0
|
||||
|
@ -1877,7 +1877,7 @@ define arm_aapcs_vfpcc <2 x double> @masked_v2f64_align4_zero(<2 x double> *%des
|
|||
; CHECK-LE-NEXT: it mi
|
||||
; CHECK-LE-NEXT: vldrmi d1, [r0, #8]
|
||||
; CHECK-LE-NEXT: add sp, #4
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
; CHECK-LE-NEXT: pop {r7, pc}
|
||||
; CHECK-LE-NEXT: .p2align 3
|
||||
; CHECK-LE-NEXT: @ %bb.4:
|
||||
; CHECK-LE-NEXT: .LCPI50_0:
|
||||
|
@ -1886,29 +1886,29 @@ define arm_aapcs_vfpcc <2 x double> @masked_v2f64_align4_zero(<2 x double> *%des
|
|||
;
|
||||
; CHECK-BE-LABEL: masked_v2f64_align4_zero:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: .save {r7, lr}
|
||||
; CHECK-BE-NEXT: push {r7, lr}
|
||||
; CHECK-BE-NEXT: .pad #4
|
||||
; CHECK-BE-NEXT: sub sp, #4
|
||||
; CHECK-BE-NEXT: vrev64.32 q0, q1
|
||||
; CHECK-BE-NEXT: movs r2, #0
|
||||
; CHECK-BE-NEXT: vmov r3, s3
|
||||
; CHECK-BE-NEXT: vmov r1, s2
|
||||
; CHECK-BE-NEXT: vmov r12, s0
|
||||
; CHECK-BE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-BE-NEXT: vmov r3, s1
|
||||
; CHECK-BE-NEXT: sbcs.w r1, r2, r1
|
||||
; CHECK-BE-NEXT: movs r3, #0
|
||||
; CHECK-BE-NEXT: vmov r1, r2, d1
|
||||
; CHECK-BE-NEXT: vmov r12, lr, d0
|
||||
; CHECK-BE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-BE-NEXT: sbcs.w r1, r3, r1
|
||||
; CHECK-BE-NEXT: mov.w r1, #0
|
||||
; CHECK-BE-NEXT: it lt
|
||||
; CHECK-BE-NEXT: movlt r1, #1
|
||||
; CHECK-BE-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-BE-NEXT: sbcs.w r3, r2, r12
|
||||
; CHECK-BE-NEXT: rsbs.w r2, lr, #0
|
||||
; CHECK-BE-NEXT: sbcs.w r2, r3, r12
|
||||
; CHECK-BE-NEXT: it lt
|
||||
; CHECK-BE-NEXT: movlt r2, #1
|
||||
; CHECK-BE-NEXT: cmp r2, #0
|
||||
; CHECK-BE-NEXT: movlt r3, #1
|
||||
; CHECK-BE-NEXT: cmp r3, #0
|
||||
; CHECK-BE-NEXT: it ne
|
||||
; CHECK-BE-NEXT: mvnne r2, #1
|
||||
; CHECK-BE-NEXT: bfi r2, r1, #0, #1
|
||||
; CHECK-BE-NEXT: and r1, r2, #3
|
||||
; CHECK-BE-NEXT: lsls r2, r2, #30
|
||||
; CHECK-BE-NEXT: mvnne r3, #1
|
||||
; CHECK-BE-NEXT: bfi r3, r1, #0, #1
|
||||
; CHECK-BE-NEXT: and r1, r3, #3
|
||||
; CHECK-BE-NEXT: lsls r2, r3, #30
|
||||
; CHECK-BE-NEXT: bpl .LBB50_2
|
||||
; CHECK-BE-NEXT: @ %bb.1: @ %cond.load
|
||||
; CHECK-BE-NEXT: vldr d1, .LCPI50_0
|
||||
|
@ -1921,7 +1921,7 @@ define arm_aapcs_vfpcc <2 x double> @masked_v2f64_align4_zero(<2 x double> *%des
|
|||
; CHECK-BE-NEXT: it ne
|
||||
; CHECK-BE-NEXT: vldrne d1, [r0, #8]
|
||||
; CHECK-BE-NEXT: add sp, #4
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
; CHECK-BE-NEXT: pop {r7, pc}
|
||||
; CHECK-BE-NEXT: .p2align 3
|
||||
; CHECK-BE-NEXT: @ %bb.4:
|
||||
; CHECK-BE-NEXT: .LCPI50_0:
|
||||
|
|
|
@ -935,19 +935,19 @@ entry:
|
|||
define arm_aapcs_vfpcc void @masked_v2i64(<2 x i64> *%dest, <2 x i64> %a) {
|
||||
; CHECK-LE-LABEL: masked_v2i64:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: .save {r7, lr}
|
||||
; CHECK-LE-NEXT: push {r7, lr}
|
||||
; CHECK-LE-NEXT: .pad #4
|
||||
; CHECK-LE-NEXT: sub sp, #4
|
||||
; CHECK-LE-NEXT: vmov r2, s0
|
||||
; CHECK-LE-NEXT: vmov r1, r2, d0
|
||||
; CHECK-LE-NEXT: movs r3, #0
|
||||
; CHECK-LE-NEXT: vmov r1, s1
|
||||
; CHECK-LE-NEXT: vmov r12, s3
|
||||
; CHECK-LE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-LE-NEXT: vmov r2, s2
|
||||
; CHECK-LE-NEXT: sbcs.w r1, r3, r1
|
||||
; CHECK-LE-NEXT: vmov lr, r12, d1
|
||||
; CHECK-LE-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r1, r3, r2
|
||||
; CHECK-LE-NEXT: mov.w r1, #0
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt r1, #1
|
||||
; CHECK-LE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-LE-NEXT: rsbs.w r2, lr, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r2, r3, r12
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt r3, #1
|
||||
|
@ -963,24 +963,24 @@ define arm_aapcs_vfpcc void @masked_v2i64(<2 x i64> *%dest, <2 x i64> %a) {
|
|||
; CHECK-LE-NEXT: it mi
|
||||
; CHECK-LE-NEXT: vstrmi d1, [r0, #8]
|
||||
; CHECK-LE-NEXT: add sp, #4
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
; CHECK-LE-NEXT: pop {r7, pc}
|
||||
;
|
||||
; CHECK-BE-LABEL: masked_v2i64:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: .save {r7, lr}
|
||||
; CHECK-BE-NEXT: push {r7, lr}
|
||||
; CHECK-BE-NEXT: .pad #4
|
||||
; CHECK-BE-NEXT: sub sp, #4
|
||||
; CHECK-BE-NEXT: vrev64.32 q1, q0
|
||||
; CHECK-BE-NEXT: movs r3, #0
|
||||
; CHECK-BE-NEXT: vmov r2, s7
|
||||
; CHECK-BE-NEXT: vmov r1, s6
|
||||
; CHECK-BE-NEXT: vmov r12, s4
|
||||
; CHECK-BE-NEXT: vmov r1, r2, d3
|
||||
; CHECK-BE-NEXT: vmov r12, lr, d2
|
||||
; CHECK-BE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-BE-NEXT: vmov r2, s5
|
||||
; CHECK-BE-NEXT: sbcs.w r1, r3, r1
|
||||
; CHECK-BE-NEXT: mov.w r1, #0
|
||||
; CHECK-BE-NEXT: it lt
|
||||
; CHECK-BE-NEXT: movlt r1, #1
|
||||
; CHECK-BE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-BE-NEXT: rsbs.w r2, lr, #0
|
||||
; CHECK-BE-NEXT: sbcs.w r2, r3, r12
|
||||
; CHECK-BE-NEXT: it lt
|
||||
; CHECK-BE-NEXT: movlt r3, #1
|
||||
|
@ -996,7 +996,7 @@ define arm_aapcs_vfpcc void @masked_v2i64(<2 x i64> *%dest, <2 x i64> %a) {
|
|||
; CHECK-BE-NEXT: it ne
|
||||
; CHECK-BE-NEXT: vstrne d1, [r0, #8]
|
||||
; CHECK-BE-NEXT: add sp, #4
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
; CHECK-BE-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%c = icmp sgt <2 x i64> %a, zeroinitializer
|
||||
call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %a, <2 x i64>* %dest, i32 8, <2 x i1> %c)
|
||||
|
@ -1006,19 +1006,19 @@ entry:
|
|||
define arm_aapcs_vfpcc void @masked_v2f64(<2 x double> *%dest, <2 x double> %a, <2 x i64> %b) {
|
||||
; CHECK-LE-LABEL: masked_v2f64:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: .save {r7, lr}
|
||||
; CHECK-LE-NEXT: push {r7, lr}
|
||||
; CHECK-LE-NEXT: .pad #4
|
||||
; CHECK-LE-NEXT: sub sp, #4
|
||||
; CHECK-LE-NEXT: vmov r2, s4
|
||||
; CHECK-LE-NEXT: vmov r1, r2, d2
|
||||
; CHECK-LE-NEXT: movs r3, #0
|
||||
; CHECK-LE-NEXT: vmov r1, s5
|
||||
; CHECK-LE-NEXT: vmov r12, s7
|
||||
; CHECK-LE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-LE-NEXT: vmov r2, s6
|
||||
; CHECK-LE-NEXT: sbcs.w r1, r3, r1
|
||||
; CHECK-LE-NEXT: vmov lr, r12, d3
|
||||
; CHECK-LE-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r1, r3, r2
|
||||
; CHECK-LE-NEXT: mov.w r1, #0
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt r1, #1
|
||||
; CHECK-LE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-LE-NEXT: rsbs.w r2, lr, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r2, r3, r12
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt r3, #1
|
||||
|
@ -1034,24 +1034,24 @@ define arm_aapcs_vfpcc void @masked_v2f64(<2 x double> *%dest, <2 x double> %a,
|
|||
; CHECK-LE-NEXT: it mi
|
||||
; CHECK-LE-NEXT: vstrmi d1, [r0, #8]
|
||||
; CHECK-LE-NEXT: add sp, #4
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
; CHECK-LE-NEXT: pop {r7, pc}
|
||||
;
|
||||
; CHECK-BE-LABEL: masked_v2f64:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: .save {r7, lr}
|
||||
; CHECK-BE-NEXT: push {r7, lr}
|
||||
; CHECK-BE-NEXT: .pad #4
|
||||
; CHECK-BE-NEXT: sub sp, #4
|
||||
; CHECK-BE-NEXT: vrev64.32 q2, q1
|
||||
; CHECK-BE-NEXT: movs r3, #0
|
||||
; CHECK-BE-NEXT: vmov r2, s11
|
||||
; CHECK-BE-NEXT: vmov r1, s10
|
||||
; CHECK-BE-NEXT: vmov r12, s8
|
||||
; CHECK-BE-NEXT: vmov r1, r2, d5
|
||||
; CHECK-BE-NEXT: vmov r12, lr, d4
|
||||
; CHECK-BE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-BE-NEXT: vmov r2, s9
|
||||
; CHECK-BE-NEXT: sbcs.w r1, r3, r1
|
||||
; CHECK-BE-NEXT: mov.w r1, #0
|
||||
; CHECK-BE-NEXT: it lt
|
||||
; CHECK-BE-NEXT: movlt r1, #1
|
||||
; CHECK-BE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-BE-NEXT: rsbs.w r2, lr, #0
|
||||
; CHECK-BE-NEXT: sbcs.w r2, r3, r12
|
||||
; CHECK-BE-NEXT: it lt
|
||||
; CHECK-BE-NEXT: movlt r3, #1
|
||||
|
@ -1067,7 +1067,7 @@ define arm_aapcs_vfpcc void @masked_v2f64(<2 x double> *%dest, <2 x double> %a,
|
|||
; CHECK-BE-NEXT: it ne
|
||||
; CHECK-BE-NEXT: vstrne d1, [r0, #8]
|
||||
; CHECK-BE-NEXT: add sp, #4
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
; CHECK-BE-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%c = icmp sgt <2 x i64> %b, zeroinitializer
|
||||
call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %a, <2 x double>* %dest, i32 8, <2 x i1> %c)
|
||||
|
|
|
@ -38,36 +38,32 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @smin_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
|
||||
; CHECK-LABEL: smin_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r12, s7
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov lr, s1
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: sbcs.w r1, r1, r12
|
||||
; CHECK-NEXT: vmov r12, s5
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: sbcs.w r2, lr, r12
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: movs r6, #0
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: vmov r12, lr, d2
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: subs r0, r2, r0
|
||||
; CHECK-NEXT: sbcs.w r0, r3, r1
|
||||
; CHECK-NEXT: mov.w r0, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r0, r1
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r0, r1
|
||||
; CHECK-NEXT: subs.w r1, r4, r12
|
||||
; CHECK-NEXT: sbcs.w r1, r5, lr
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r6, #1
|
||||
; CHECK-NEXT: cmp r6, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vbic q1, q1, q2
|
||||
; CHECK-NEXT: vand q0, q0, q2
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%0 = icmp slt <2 x i64> %s1, %s2
|
||||
%1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2
|
||||
|
@ -110,36 +106,32 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @umin_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
|
||||
; CHECK-LABEL: umin_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r12, s7
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov lr, s1
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: sbcs.w r1, r1, r12
|
||||
; CHECK-NEXT: vmov r12, s5
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: sbcs.w r2, lr, r12
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: movs r6, #0
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: vmov r12, lr, d2
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: subs r0, r2, r0
|
||||
; CHECK-NEXT: sbcs.w r0, r3, r1
|
||||
; CHECK-NEXT: mov.w r0, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r0, r1
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r0, r1
|
||||
; CHECK-NEXT: subs.w r1, r4, r12
|
||||
; CHECK-NEXT: sbcs.w r1, r5, lr
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r6, #1
|
||||
; CHECK-NEXT: cmp r6, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vbic q1, q1, q2
|
||||
; CHECK-NEXT: vand q0, q0, q2
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%0 = icmp ult <2 x i64> %s1, %s2
|
||||
%1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2
|
||||
|
@ -183,36 +175,32 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @smax_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
|
||||
; CHECK-LABEL: smax_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: vmov r12, s3
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: vmov lr, s5
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: sbcs.w r1, r1, r12
|
||||
; CHECK-NEXT: vmov r12, s1
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: sbcs.w r2, lr, r12
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: movs r6, #0
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: vmov r12, lr, d0
|
||||
; CHECK-NEXT: vmov r4, r5, d2
|
||||
; CHECK-NEXT: subs r0, r2, r0
|
||||
; CHECK-NEXT: sbcs.w r0, r3, r1
|
||||
; CHECK-NEXT: mov.w r0, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r0, r1
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r0, r1
|
||||
; CHECK-NEXT: subs.w r1, r4, r12
|
||||
; CHECK-NEXT: sbcs.w r1, r5, lr
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r6, #1
|
||||
; CHECK-NEXT: cmp r6, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vbic q1, q1, q2
|
||||
; CHECK-NEXT: vand q0, q0, q2
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%0 = icmp sgt <2 x i64> %s1, %s2
|
||||
%1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2
|
||||
|
@ -255,36 +243,32 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @umax_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
|
||||
; CHECK-LABEL: umax_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: vmov r12, s3
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: vmov lr, s5
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: sbcs.w r1, r1, r12
|
||||
; CHECK-NEXT: vmov r12, s1
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: sbcs.w r2, lr, r12
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: movs r6, #0
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: vmov r12, lr, d0
|
||||
; CHECK-NEXT: vmov r4, r5, d2
|
||||
; CHECK-NEXT: subs r0, r2, r0
|
||||
; CHECK-NEXT: sbcs.w r0, r3, r1
|
||||
; CHECK-NEXT: mov.w r0, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r0, r1
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r0, r1
|
||||
; CHECK-NEXT: subs.w r1, r4, r12
|
||||
; CHECK-NEXT: sbcs.w r1, r5, lr
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r6, #1
|
||||
; CHECK-NEXT: cmp r6, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vbic q1, q1, q2
|
||||
; CHECK-NEXT: vand q0, q0, q2
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%0 = icmp ugt <2 x i64> %s1, %s2
|
||||
%1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2
|
||||
|
|
|
@ -34,17 +34,15 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @neg_v2i64(<2 x i64> %s1) {
|
||||
; CHECK-LABEL: neg_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: sbc.w r0, r12, r0
|
||||
; CHECK-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-NEXT: sbc.w r3, r12, r3
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r1
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r0
|
||||
; CHECK-NEXT: vmov r3, r2, d0
|
||||
; CHECK-NEXT: rsbs r0, r0, #0
|
||||
; CHECK-NEXT: sbc.w r1, r12, r1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: sbc.w r2, r12, r2
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r2, r1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = sub nsw <2 x i64> zeroinitializer, %s1
|
||||
|
|
|
@ -102,31 +102,31 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x float> @vector_add_f32(<4 x float> %lhs, <4 x float> %rhs) {
|
||||
; CHECK-NOFP-LABEL: vector_add_f32:
|
||||
; CHECK-NOFP: @ %bb.0: @ %entry
|
||||
; CHECK-NOFP-NEXT: .save {r7, lr}
|
||||
; CHECK-NOFP-NEXT: push {r7, lr}
|
||||
; CHECK-NOFP-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NOFP-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NOFP-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NOFP-NEXT: vpush {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NOFP-NEXT: vmov q4, q1
|
||||
; CHECK-NOFP-NEXT: vmov q5, q1
|
||||
; CHECK-NOFP-NEXT: vmov q6, q0
|
||||
; CHECK-NOFP-NEXT: vmov r0, s27
|
||||
; CHECK-NOFP-NEXT: vmov r1, s19
|
||||
; CHECK-NOFP-NEXT: vmov r4, r0, d13
|
||||
; CHECK-NOFP-NEXT: vmov r5, r1, d11
|
||||
; CHECK-NOFP-NEXT: bl __aeabi_fadd
|
||||
; CHECK-NOFP-NEXT: vmov s23, r0
|
||||
; CHECK-NOFP-NEXT: vmov r0, s26
|
||||
; CHECK-NOFP-NEXT: vmov r1, s18
|
||||
; CHECK-NOFP-NEXT: vmov s19, r0
|
||||
; CHECK-NOFP-NEXT: mov r0, r4
|
||||
; CHECK-NOFP-NEXT: mov r1, r5
|
||||
; CHECK-NOFP-NEXT: bl __aeabi_fadd
|
||||
; CHECK-NOFP-NEXT: vmov s22, r0
|
||||
; CHECK-NOFP-NEXT: vmov r0, s25
|
||||
; CHECK-NOFP-NEXT: vmov r1, s17
|
||||
; CHECK-NOFP-NEXT: vmov s18, r0
|
||||
; CHECK-NOFP-NEXT: vmov r4, r0, d12
|
||||
; CHECK-NOFP-NEXT: vmov r5, r1, d10
|
||||
; CHECK-NOFP-NEXT: bl __aeabi_fadd
|
||||
; CHECK-NOFP-NEXT: vmov s21, r0
|
||||
; CHECK-NOFP-NEXT: vmov r0, s24
|
||||
; CHECK-NOFP-NEXT: vmov r1, s16
|
||||
; CHECK-NOFP-NEXT: vmov s17, r0
|
||||
; CHECK-NOFP-NEXT: mov r0, r4
|
||||
; CHECK-NOFP-NEXT: mov r1, r5
|
||||
; CHECK-NOFP-NEXT: bl __aeabi_fadd
|
||||
; CHECK-NOFP-NEXT: vmov s20, r0
|
||||
; CHECK-NOFP-NEXT: vmov q0, q5
|
||||
; CHECK-NOFP-NEXT: vmov s16, r0
|
||||
; CHECK-NOFP-NEXT: vmov q0, q4
|
||||
; CHECK-NOFP-NEXT: vpop {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NOFP-NEXT: pop {r7, pc}
|
||||
; CHECK-NOFP-NEXT: pop {r4, r5, r7, pc}
|
||||
;
|
||||
; CHECK-FP-LABEL: vector_add_f32:
|
||||
; CHECK-FP: @ %bb.0: @ %entry
|
||||
|
|
|
@ -6,54 +6,52 @@
|
|||
define arm_aapcs_vfpcc void @k() {
|
||||
; CHECK-LABEL: k:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: .pad #16
|
||||
; CHECK-NEXT: sub sp, #16
|
||||
; CHECK-NEXT: adr r5, .LCPI0_0
|
||||
; CHECK-NEXT: adr r4, .LCPI0_1
|
||||
; CHECK-NEXT: vldrw.u32 q5, [r5]
|
||||
; CHECK-NEXT: vldrw.u32 q6, [r4]
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
|
||||
; CHECK-NEXT: .pad #4
|
||||
; CHECK-NEXT: sub sp, #4
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
|
||||
; CHECK-NEXT: .pad #24
|
||||
; CHECK-NEXT: sub sp, #24
|
||||
; CHECK-NEXT: adr.w r8, .LCPI0_0
|
||||
; CHECK-NEXT: adr.w r9, .LCPI0_1
|
||||
; CHECK-NEXT: vldrw.u32 q6, [r8]
|
||||
; CHECK-NEXT: vldrw.u32 q5, [r9]
|
||||
; CHECK-NEXT: vmov.i32 q0, #0x1
|
||||
; CHECK-NEXT: vmov.i8 q1, #0x0
|
||||
; CHECK-NEXT: vmov.i8 q2, #0xff
|
||||
; CHECK-NEXT: vmov.i16 q3, #0x6
|
||||
; CHECK-NEXT: vmov.i16 q4, #0x3
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: .LBB0_1: @ %vector.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vand q5, q5, q0
|
||||
; CHECK-NEXT: vand q6, q6, q0
|
||||
; CHECK-NEXT: vand q5, q5, q0
|
||||
; CHECK-NEXT: vcmp.i32 eq, q6, zr
|
||||
; CHECK-NEXT: cmp.w r12, #0
|
||||
; CHECK-NEXT: vpsel q6, q2, q1
|
||||
; CHECK-NEXT: vcmp.i32 eq, q5, zr
|
||||
; CHECK-NEXT: vpsel q5, q2, q1
|
||||
; CHECK-NEXT: vcmp.i32 eq, q6, zr
|
||||
; CHECK-NEXT: vpsel q7, q2, q1
|
||||
; CHECK-NEXT: vmov r1, s28
|
||||
; CHECK-NEXT: vmov.16 q6[0], r1
|
||||
; CHECK-NEXT: vmov r1, s29
|
||||
; CHECK-NEXT: vmov.16 q6[1], r1
|
||||
; CHECK-NEXT: vmov r1, s30
|
||||
; CHECK-NEXT: vmov.16 q6[2], r1
|
||||
; CHECK-NEXT: vmov r1, s31
|
||||
; CHECK-NEXT: vmov.16 q6[3], r1
|
||||
; CHECK-NEXT: vmov r1, s20
|
||||
; CHECK-NEXT: vmov.16 q6[4], r1
|
||||
; CHECK-NEXT: vmov r1, s21
|
||||
; CHECK-NEXT: vmov.16 q6[5], r1
|
||||
; CHECK-NEXT: vmov r1, s22
|
||||
; CHECK-NEXT: vmov.16 q6[6], r1
|
||||
; CHECK-NEXT: vmov r1, s23
|
||||
; CHECK-NEXT: vmov.16 q6[7], r1
|
||||
; CHECK-NEXT: vcmp.i16 ne, q6, zr
|
||||
; CHECK-NEXT: vmov.i32 q6, #0x0
|
||||
; CHECK-NEXT: vpsel q5, q4, q3
|
||||
; CHECK-NEXT: vstrh.16 q5, [r0]
|
||||
; CHECK-NEXT: vmov q5, q6
|
||||
; CHECK-NEXT: cbz r0, .LBB0_2
|
||||
; CHECK-NEXT: le .LBB0_1
|
||||
; CHECK-NEXT: .LBB0_2: @ %for.cond4.preheader
|
||||
; CHECK-NEXT: vmov r4, r0, d12
|
||||
; CHECK-NEXT: vmov r3, r6, d10
|
||||
; CHECK-NEXT: vmov r1, r2, d11
|
||||
; CHECK-NEXT: vmov.16 q5[0], r3
|
||||
; CHECK-NEXT: vmov.16 q5[1], r6
|
||||
; CHECK-NEXT: vmov r5, r7, d13
|
||||
; CHECK-NEXT: vmov.16 q5[2], r1
|
||||
; CHECK-NEXT: vmov.16 q5[3], r2
|
||||
; CHECK-NEXT: vmov.16 q5[4], r4
|
||||
; CHECK-NEXT: vmov.16 q5[5], r0
|
||||
; CHECK-NEXT: vmov.16 q5[6], r5
|
||||
; CHECK-NEXT: vmov.16 q5[7], r7
|
||||
; CHECK-NEXT: vcmp.i16 ne, q5, zr
|
||||
; CHECK-NEXT: vmov.i32 q5, #0x0
|
||||
; CHECK-NEXT: vpsel q6, q4, q3
|
||||
; CHECK-NEXT: vstrh.16 q6, [r0]
|
||||
; CHECK-NEXT: vmov q6, q5
|
||||
; CHECK-NEXT: bne .LBB0_1
|
||||
; CHECK-NEXT: @ %bb.2: @ %for.cond4.preheader
|
||||
; CHECK-NEXT: movs r6, #0
|
||||
; CHECK-NEXT: cbnz r6, .LBB0_5
|
||||
; CHECK-NEXT: .LBB0_3: @ %for.body10
|
||||
|
@ -63,8 +61,8 @@ define arm_aapcs_vfpcc void @k() {
|
|||
; CHECK-NEXT: .LBB0_4: @ %for.cond4.loopexit
|
||||
; CHECK-NEXT: bl l
|
||||
; CHECK-NEXT: .LBB0_5: @ %vector.body105.preheader
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r5]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r4]
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r8]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r9]
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x8
|
||||
; CHECK-NEXT: .LBB0_6: @ %vector.body105
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
|
@ -73,7 +71,7 @@ define arm_aapcs_vfpcc void @k() {
|
|||
; CHECK-NEXT: cbz r6, .LBB0_7
|
||||
; CHECK-NEXT: le .LBB0_6
|
||||
; CHECK-NEXT: .LBB0_7: @ %vector.body115.ph
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r4]
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r9]
|
||||
; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill
|
||||
; CHECK-NEXT: @APP
|
||||
; CHECK-NEXT: nop
|
||||
|
|
|
@ -575,11 +575,9 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) {
|
|||
; CHECK-LABEL: cmpeqz_v2i1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vorr q2, q0, q1
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov r1, s10
|
||||
; CHECK-NEXT: vmov r2, s8
|
||||
; CHECK-NEXT: vmov r0, r1, d5
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s9
|
||||
; CHECK-NEXT: vmov r1, r2, d4
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
|
@ -604,33 +602,27 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @cmpeq_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
|
||||
; CHECK-LABEL: cmpeq_v2i1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
; CHECK-NEXT: vmov r1, s10
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s5
|
||||
; CHECK-NEXT: vmov r0, r1, d5
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: eors r0, r2
|
||||
; CHECK-NEXT: eors r1, r3
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s9
|
||||
; CHECK-NEXT: vmov r12, r2, d4
|
||||
; CHECK-NEXT: vmov r3, r1, d2
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s8
|
||||
; CHECK-NEXT: eors r2, r3
|
||||
; CHECK-NEXT: eor.w r2, r3, r12
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
|
@ -656,29 +648,25 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @cmpeqr_v2i1(<2 x i64> %a, <2 x i64> %b, i64 %c) {
|
||||
; CHECK-LABEL: cmpeqr_v2i1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r2, s7
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: eors r2, r1
|
||||
; CHECK-NEXT: eors r3, r0
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: eors r3, r1
|
||||
; CHECK-NEXT: eors r2, r0
|
||||
; CHECK-NEXT: orrs r2, r3
|
||||
; CHECK-NEXT: vmov r3, s5
|
||||
; CHECK-NEXT: vmov r12, r3, d2
|
||||
; CHECK-NEXT: cset r2, eq
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: csetm r2, ne
|
||||
; CHECK-NEXT: eors r1, r3
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: eors r0, r3
|
||||
; CHECK-NEXT: eor.w r0, r0, r12
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r0, r2
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r0, r2
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
|
|
|
@ -389,11 +389,9 @@ define arm_aapcs_vfpcc i2 @bitcast_from_v2i1(<2 x i64> %a) {
|
|||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: .pad #4
|
||||
; CHECK-LE-NEXT: sub sp, #4
|
||||
; CHECK-LE-NEXT: vmov r0, s1
|
||||
; CHECK-LE-NEXT: vmov r1, s0
|
||||
; CHECK-LE-NEXT: vmov r2, s2
|
||||
; CHECK-LE-NEXT: vmov r0, r1, d0
|
||||
; CHECK-LE-NEXT: orrs r0, r1
|
||||
; CHECK-LE-NEXT: vmov r1, s3
|
||||
; CHECK-LE-NEXT: vmov r1, r2, d1
|
||||
; CHECK-LE-NEXT: cset r0, eq
|
||||
; CHECK-LE-NEXT: orrs r1, r2
|
||||
; CHECK-LE-NEXT: cset r1, eq
|
||||
|
@ -410,11 +408,9 @@ define arm_aapcs_vfpcc i2 @bitcast_from_v2i1(<2 x i64> %a) {
|
|||
; CHECK-BE-NEXT: .pad #4
|
||||
; CHECK-BE-NEXT: sub sp, #4
|
||||
; CHECK-BE-NEXT: vrev64.32 q1, q0
|
||||
; CHECK-BE-NEXT: vmov r0, s6
|
||||
; CHECK-BE-NEXT: vmov r1, s7
|
||||
; CHECK-BE-NEXT: vmov r2, s5
|
||||
; CHECK-BE-NEXT: vmov r0, r1, d3
|
||||
; CHECK-BE-NEXT: orrs r0, r1
|
||||
; CHECK-BE-NEXT: vmov r1, s4
|
||||
; CHECK-BE-NEXT: vmov r1, r2, d2
|
||||
; CHECK-BE-NEXT: cset r0, eq
|
||||
; CHECK-BE-NEXT: orrs r1, r2
|
||||
; CHECK-BE-NEXT: cset r1, eq
|
||||
|
|
|
@ -46,23 +46,21 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @sext_v2i1_v2i64(<2 x i64> %src) {
|
||||
; CHECK-LABEL: sext_v2i1_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: movs r2, #0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: sbcs.w r0, r2, r0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vmov r2, r12, d0
|
||||
; CHECK-NEXT: rsbs r0, r0, #0
|
||||
; CHECK-NEXT: sbcs.w r0, r3, r1
|
||||
; CHECK-NEXT: mov.w r0, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: sbcs.w r1, r2, r1
|
||||
; CHECK-NEXT: rsbs r1, r2, #0
|
||||
; CHECK-NEXT: sbcs.w r1, r3, r12
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
|
@ -119,29 +117,29 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @zext_v2i1_v2i64(<2 x i64> %src) {
|
||||
; CHECK-LABEL: zext_v2i1_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: adr r1, .LCPI7_0
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r1]
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: sbcs.w r1, r0, r1
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: sbcs.w r2, r0, r2
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vmov lr, r12, d0
|
||||
; CHECK-NEXT: adr r2, .LCPI7_0
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r2]
|
||||
; CHECK-NEXT: rsbs r0, r0, #0
|
||||
; CHECK-NEXT: sbcs.w r0, r3, r1
|
||||
; CHECK-NEXT: mov.w r0, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: rsbs.w r1, lr, #0
|
||||
; CHECK-NEXT: sbcs.w r1, r3, r12
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vand q0, q1, q0
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI7_0:
|
||||
|
|
|
@ -318,11 +318,9 @@ entry:
|
|||
define arm_aapcs_vfpcc void @store_v2i1(<2 x i1> *%dst, <2 x i64> %a) {
|
||||
; CHECK-LE-LABEL: store_v2i1:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: vmov r1, s1
|
||||
; CHECK-LE-NEXT: vmov r2, s0
|
||||
; CHECK-LE-NEXT: vmov r3, s2
|
||||
; CHECK-LE-NEXT: vmov r1, r2, d0
|
||||
; CHECK-LE-NEXT: orrs r1, r2
|
||||
; CHECK-LE-NEXT: vmov r2, s3
|
||||
; CHECK-LE-NEXT: vmov r2, r3, d1
|
||||
; CHECK-LE-NEXT: cset r1, eq
|
||||
; CHECK-LE-NEXT: orrs r2, r3
|
||||
; CHECK-LE-NEXT: cset r2, eq
|
||||
|
@ -337,11 +335,9 @@ define arm_aapcs_vfpcc void @store_v2i1(<2 x i1> *%dst, <2 x i64> %a) {
|
|||
; CHECK-BE-LABEL: store_v2i1:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: vrev64.32 q1, q0
|
||||
; CHECK-BE-NEXT: vmov r1, s6
|
||||
; CHECK-BE-NEXT: vmov r2, s7
|
||||
; CHECK-BE-NEXT: vmov r3, s5
|
||||
; CHECK-BE-NEXT: vmov r1, r2, d3
|
||||
; CHECK-BE-NEXT: orrs r1, r2
|
||||
; CHECK-BE-NEXT: vmov r2, s4
|
||||
; CHECK-BE-NEXT: vmov r2, r3, d2
|
||||
; CHECK-BE-NEXT: cset r1, eq
|
||||
; CHECK-BE-NEXT: orrs r2, r3
|
||||
; CHECK-BE-NEXT: cset r2, eq
|
||||
|
|
|
@ -323,11 +323,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: cmpeqz_v2i1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
|
@ -351,11 +349,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @cmpeq_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
|
||||
; CHECK-LABEL: cmpeq_v2i1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
|
|
|
@ -377,25 +377,21 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: cmpeqz_v2i1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vmov r1, r2, d2
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
|
@ -421,33 +417,27 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @cmpeq_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
|
||||
; CHECK-LABEL: cmpeq_v2i1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
; CHECK-NEXT: vmov r1, s10
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s5
|
||||
; CHECK-NEXT: vmov r0, r1, d5
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: eors r0, r2
|
||||
; CHECK-NEXT: eors r1, r3
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s9
|
||||
; CHECK-NEXT: vmov r12, r2, d4
|
||||
; CHECK-NEXT: vmov r3, r1, d2
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s8
|
||||
; CHECK-NEXT: eors r2, r3
|
||||
; CHECK-NEXT: eor.w r2, r3, r12
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
|
|
|
@ -459,27 +459,23 @@ define <8 x i16> @shuffle6_v4i32(<4 x i32> %src1, <4 x i32> %src2, <8 x i16> %a,
|
|||
; CHECK-NEXT: vmov.i8 q2, #0xff
|
||||
; CHECK-NEXT: vcmp.i32 eq, q0, zr
|
||||
; CHECK-NEXT: vpsel q3, q2, q1
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmov r0, r1, d6
|
||||
; CHECK-NEXT: vmov.16 q0[0], r0
|
||||
; CHECK-NEXT: vmov r0, s13
|
||||
; CHECK-NEXT: vmov.16 q0[1], r0
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: vmov.16 q0[1], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d7
|
||||
; CHECK-NEXT: vmov.16 q0[2], r0
|
||||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: vmov.16 q0[3], r0
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r0]
|
||||
; CHECK-NEXT: vmov.16 q0[3], r1
|
||||
; CHECK-NEXT: vcmp.i32 eq, q3, zr
|
||||
; CHECK-NEXT: vpsel q1, q2, q1
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: vmov.16 q0[4], r0
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.16 q0[5], r0
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.16 q0[5], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: vmov.16 q0[6], r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.16 q0[7], r0
|
||||
; CHECK-NEXT: add r0, sp, #32
|
||||
; CHECK-NEXT: vmov.16 q0[7], r1
|
||||
; CHECK-NEXT: vcmp.i16 ne, q0, zr
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: add r0, sp, #16
|
||||
|
|
|
@ -72,40 +72,34 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov r1, s10
|
||||
; CHECK-NEXT: vmov r2, s8
|
||||
; CHECK-NEXT: vmov r0, r1, d5
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s9
|
||||
; CHECK-NEXT: vmov r1, r2, d4
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vmov r1, r2, d2
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: vbic q3, q3, q2
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
|
@ -202,40 +196,34 @@ define arm_aapcs_vfpcc <2 x i64> @cmpnez_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov r1, s10
|
||||
; CHECK-NEXT: vmov r2, s8
|
||||
; CHECK-NEXT: vmov r0, r1, d5
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s9
|
||||
; CHECK-NEXT: vmov r1, r2, d4
|
||||
; CHECK-NEXT: cset r0, ne
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: cset r1, ne
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vmov r1, r2, d2
|
||||
; CHECK-NEXT: cset r0, ne
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: cset r1, ne
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: vbic q3, q3, q2
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: cset r0, ne
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
|
@ -441,25 +429,21 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1_i1(<2 x i64> %a, <2 x i64> %b, i64
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: vmov r2, s7
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: orrs r2, r3
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: cset r2, eq
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: vmov r2, s5
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: csetm r12, ne
|
||||
; CHECK-NEXT: orrs r2, r3
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: cset r2, eq
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: csetm r4, ne
|
||||
; CHECK-NEXT: orrs r2, r3
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: cset r2, eq
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: csetm lr, ne
|
||||
; CHECK-NEXT: orrs r2, r3
|
||||
; CHECK-NEXT: cset r2, eq
|
||||
|
|
|
@ -457,25 +457,21 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: cmpeqz_v2i1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vmov r1, r2, d2
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
|
@ -501,33 +497,27 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @cmpeq_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
|
||||
; CHECK-LABEL: cmpeq_v2i1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
; CHECK-NEXT: vmov r1, s10
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s5
|
||||
; CHECK-NEXT: vmov r0, r1, d5
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: eors r0, r2
|
||||
; CHECK-NEXT: eors r1, r3
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s9
|
||||
; CHECK-NEXT: vmov r12, r2, d4
|
||||
; CHECK-NEXT: vmov r3, r1, d2
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s8
|
||||
; CHECK-NEXT: eors r2, r3
|
||||
; CHECK-NEXT: eor.w r2, r3, r12
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
|
|
|
@ -68,20 +68,18 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32*
|
|||
; CHECK-NEXT: vbic q3, q0, q2
|
||||
; CHECK-NEXT: vand q2, q4, q2
|
||||
; CHECK-NEXT: vorr q2, q2, q3
|
||||
; CHECK-NEXT: vmov r4, s8
|
||||
; CHECK-NEXT: vmov r3, s9
|
||||
; CHECK-NEXT: vmov r5, s10
|
||||
; CHECK-NEXT: subs r4, r4, r6
|
||||
; CHECK-NEXT: vmov r4, s11
|
||||
; CHECK-NEXT: sbcs r3, r3, #0
|
||||
; CHECK-NEXT: vmov r3, r4, d4
|
||||
; CHECK-NEXT: subs r3, r3, r6
|
||||
; CHECK-NEXT: sbcs r3, r4, #0
|
||||
; CHECK-NEXT: vmov r4, r5, d5
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r3, ne
|
||||
; CHECK-NEXT: subs r5, r5, r6
|
||||
; CHECK-NEXT: vmov.32 q3[1], r3
|
||||
; CHECK-NEXT: sbcs r4, r4, #0
|
||||
; CHECK-NEXT: subs r4, r4, r6
|
||||
; CHECK-NEXT: sbcs r4, r5, #0
|
||||
; CHECK-NEXT: mov.w r4, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r4, #1
|
||||
|
@ -260,19 +258,17 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(i32* nocapture readonly %pSrcA, i32*
|
|||
; CHECK-NEXT: vmov.f32 s22, s15
|
||||
; CHECK-NEXT: vmullb.s32 q6, q5, q4
|
||||
; CHECK-NEXT: vmov.f32 s14, s13
|
||||
; CHECK-NEXT: vmov r7, s27
|
||||
; CHECK-NEXT: vmov r4, s26
|
||||
; CHECK-NEXT: vmov r4, r7, d13
|
||||
; CHECK-NEXT: asrl r4, r7, #31
|
||||
; CHECK-NEXT: vmov r10, s24
|
||||
; CHECK-NEXT: rsbs.w r5, r4, #-2147483648
|
||||
; CHECK-NEXT: vmov.f32 s10, s9
|
||||
; CHECK-NEXT: sbcs.w r5, r2, r7
|
||||
; CHECK-NEXT: rsbs.w r5, r4, #-2147483648
|
||||
; CHECK-NEXT: vmov r6, s12
|
||||
; CHECK-NEXT: sbcs.w r5, r2, r7
|
||||
; CHECK-NEXT: mov.w r5, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r5, #1
|
||||
; CHECK-NEXT: cmp r5, #0
|
||||
; CHECK-NEXT: vmov r5, s25
|
||||
; CHECK-NEXT: vmov r10, r5, d12
|
||||
; CHECK-NEXT: csetm r8, ne
|
||||
; CHECK-NEXT: asrl r10, r5, #31
|
||||
; CHECK-NEXT: rsbs.w r3, r10, #-2147483648
|
||||
|
@ -290,21 +286,19 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(i32* nocapture readonly %pSrcA, i32*
|
|||
; CHECK-NEXT: vbic q5, q0, q4
|
||||
; CHECK-NEXT: vand q4, q6, q4
|
||||
; CHECK-NEXT: vorr q4, q4, q5
|
||||
; CHECK-NEXT: vmov r4, s16
|
||||
; CHECK-NEXT: vmov r3, s17
|
||||
; CHECK-NEXT: vmov r5, s18
|
||||
; CHECK-NEXT: subs.w r4, r4, r8
|
||||
; CHECK-NEXT: vmov r4, s19
|
||||
; CHECK-NEXT: sbcs r3, r3, #0
|
||||
; CHECK-NEXT: vmov r3, r4, d8
|
||||
; CHECK-NEXT: subs.w r3, r3, r8
|
||||
; CHECK-NEXT: sbcs r3, r4, #0
|
||||
; CHECK-NEXT: vmov r4, r5, d9
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r3, ne
|
||||
; CHECK-NEXT: subs.w r5, r5, r8
|
||||
; CHECK-NEXT: vmov.32 q5[1], r3
|
||||
; CHECK-NEXT: subs.w r4, r4, r8
|
||||
; CHECK-NEXT: sbcs r4, r5, #0
|
||||
; CHECK-NEXT: vmov r5, s8
|
||||
; CHECK-NEXT: sbcs r4, r4, #0
|
||||
; CHECK-NEXT: mov.w r4, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r4, #1
|
||||
|
@ -313,10 +307,10 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(i32* nocapture readonly %pSrcA, i32*
|
|||
; CHECK-NEXT: vmov q5[2], q5[0], r3, r4
|
||||
; CHECK-NEXT: vmov r3, s10
|
||||
; CHECK-NEXT: vmov r4, s14
|
||||
; CHECK-NEXT: smull r6, r5, r6, r5
|
||||
; CHECK-NEXT: vbic q6, q1, q5
|
||||
; CHECK-NEXT: vand q4, q4, q5
|
||||
; CHECK-NEXT: vorr q4, q4, q6
|
||||
; CHECK-NEXT: smull r6, r5, r6, r5
|
||||
; CHECK-NEXT: asrl r6, r5, #31
|
||||
; CHECK-NEXT: smull r4, r7, r4, r3
|
||||
; CHECK-NEXT: asrl r4, r7, #31
|
||||
|
@ -342,20 +336,18 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(i32* nocapture readonly %pSrcA, i32*
|
|||
; CHECK-NEXT: vbic q3, q0, q2
|
||||
; CHECK-NEXT: vand q2, q5, q2
|
||||
; CHECK-NEXT: vorr q2, q2, q3
|
||||
; CHECK-NEXT: vmov r3, s8
|
||||
; CHECK-NEXT: vmov r4, s9
|
||||
; CHECK-NEXT: subs.w r3, r3, r8
|
||||
; CHECK-NEXT: sbcs r3, r4, #0
|
||||
; CHECK-NEXT: vmov r4, s10
|
||||
; CHECK-NEXT: vmov r4, r3, d4
|
||||
; CHECK-NEXT: subs.w r4, r4, r8
|
||||
; CHECK-NEXT: sbcs r3, r3, #0
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: vmov r3, s11
|
||||
; CHECK-NEXT: vmov r3, r4, d5
|
||||
; CHECK-NEXT: csetm r5, ne
|
||||
; CHECK-NEXT: vmov.32 q3[1], r5
|
||||
; CHECK-NEXT: subs.w r4, r4, r8
|
||||
; CHECK-NEXT: sbcs r3, r3, #0
|
||||
; CHECK-NEXT: subs.w r3, r3, r8
|
||||
; CHECK-NEXT: sbcs r3, r4, #0
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
|
@ -538,18 +530,16 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(i32* nocapture readonly %pSrcA, i32*
|
|||
; CHECK-NEXT: vmov.f32 s30, s23
|
||||
; CHECK-NEXT: vmullb.s32 q0, q7, q6
|
||||
; CHECK-NEXT: vmov.f32 s18, s17
|
||||
; CHECK-NEXT: vmov r5, s3
|
||||
; CHECK-NEXT: vmov r6, s2
|
||||
; CHECK-NEXT: vmov r6, r5, d1
|
||||
; CHECK-NEXT: asrl r6, r5, #31
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: rsbs.w r7, r6, #-2147483648
|
||||
; CHECK-NEXT: vmov.f32 s22, s21
|
||||
; CHECK-NEXT: rsbs.w r7, r6, #-2147483648
|
||||
; CHECK-NEXT: sbcs.w r7, r12, r5
|
||||
; CHECK-NEXT: mov.w r7, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r7, #1
|
||||
; CHECK-NEXT: cmp r7, #0
|
||||
; CHECK-NEXT: vmov r7, s1
|
||||
; CHECK-NEXT: vmov r4, r7, d0
|
||||
; CHECK-NEXT: csetm r10, ne
|
||||
; CHECK-NEXT: asrl r4, r7, #31
|
||||
; CHECK-NEXT: rsbs.w r3, r4, #-2147483648
|
||||
|
@ -567,20 +557,18 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(i32* nocapture readonly %pSrcA, i32*
|
|||
; CHECK-NEXT: vbic q6, q2, q0
|
||||
; CHECK-NEXT: vand q0, q7, q0
|
||||
; CHECK-NEXT: vorr q6, q0, q6
|
||||
; CHECK-NEXT: vmov r4, s24
|
||||
; CHECK-NEXT: vmov r3, s25
|
||||
; CHECK-NEXT: vmov r5, s26
|
||||
; CHECK-NEXT: subs.w r4, r4, r8
|
||||
; CHECK-NEXT: vmov r4, s27
|
||||
; CHECK-NEXT: sbcs r3, r3, #0
|
||||
; CHECK-NEXT: vmov r3, r4, d12
|
||||
; CHECK-NEXT: subs.w r3, r3, r8
|
||||
; CHECK-NEXT: sbcs r3, r4, #0
|
||||
; CHECK-NEXT: vmov r4, r5, d13
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r3, ne
|
||||
; CHECK-NEXT: subs.w r5, r5, r8
|
||||
; CHECK-NEXT: vmov.32 q0[1], r3
|
||||
; CHECK-NEXT: sbcs r4, r4, #0
|
||||
; CHECK-NEXT: subs.w r4, r4, r8
|
||||
; CHECK-NEXT: sbcs r4, r5, #0
|
||||
; CHECK-NEXT: mov.w r4, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r4, #1
|
||||
|
@ -618,20 +606,18 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(i32* nocapture readonly %pSrcA, i32*
|
|||
; CHECK-NEXT: vbic q4, q2, q0
|
||||
; CHECK-NEXT: vand q0, q5, q0
|
||||
; CHECK-NEXT: vorr q4, q0, q4
|
||||
; CHECK-NEXT: vmov r4, s16
|
||||
; CHECK-NEXT: vmov r3, s17
|
||||
; CHECK-NEXT: vmov r5, s18
|
||||
; CHECK-NEXT: subs.w r4, r4, r8
|
||||
; CHECK-NEXT: vmov r4, s19
|
||||
; CHECK-NEXT: sbcs r3, r3, #0
|
||||
; CHECK-NEXT: vmov r3, r4, d8
|
||||
; CHECK-NEXT: subs.w r3, r3, r8
|
||||
; CHECK-NEXT: sbcs r3, r4, #0
|
||||
; CHECK-NEXT: vmov r4, r5, d9
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r3, ne
|
||||
; CHECK-NEXT: subs.w r5, r5, r8
|
||||
; CHECK-NEXT: vmov.32 q0[1], r3
|
||||
; CHECK-NEXT: sbcs r4, r4, #0
|
||||
; CHECK-NEXT: subs.w r4, r4, r8
|
||||
; CHECK-NEXT: sbcs r4, r5, #0
|
||||
; CHECK-NEXT: mov.w r4, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r4, #1
|
||||
|
@ -901,14 +887,12 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(i32* nocapture readonly %pSrcA, i32*
|
|||
; CHECK-NEXT: vmov.f32 s14, s11
|
||||
; CHECK-NEXT: vmullb.u32 q4, q3, q1
|
||||
; CHECK-NEXT: vmov.f32 s2, s1
|
||||
; CHECK-NEXT: vmov r5, s17
|
||||
; CHECK-NEXT: vmov r4, s16
|
||||
; CHECK-NEXT: vmov r4, r5, d8
|
||||
; CHECK-NEXT: lsrl r4, r5, #31
|
||||
; CHECK-NEXT: vmov r7, s19
|
||||
; CHECK-NEXT: subs.w r6, r4, #-1
|
||||
; CHECK-NEXT: vmov.f32 s10, s9
|
||||
; CHECK-NEXT: subs.w r6, r4, #-1
|
||||
; CHECK-NEXT: sbcs r5, r5, #0
|
||||
; CHECK-NEXT: vmov r6, s18
|
||||
; CHECK-NEXT: vmov r6, r7, d9
|
||||
; CHECK-NEXT: mov.w r5, #0
|
||||
; CHECK-NEXT: lsrl r6, r7, #31
|
||||
; CHECK-NEXT: it lo
|
||||
|
@ -928,13 +912,11 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(i32* nocapture readonly %pSrcA, i32*
|
|||
; CHECK-NEXT: vand q3, q3, q1
|
||||
; CHECK-NEXT: vorn q1, q3, q1
|
||||
; CHECK-NEXT: vmullb.u32 q3, q2, q0
|
||||
; CHECK-NEXT: vmov r5, s13
|
||||
; CHECK-NEXT: vmov r4, s12
|
||||
; CHECK-NEXT: vmov r4, r5, d6
|
||||
; CHECK-NEXT: lsrl r4, r5, #31
|
||||
; CHECK-NEXT: vmov r7, s15
|
||||
; CHECK-NEXT: subs.w r6, r4, #-1
|
||||
; CHECK-NEXT: sbcs r5, r5, #0
|
||||
; CHECK-NEXT: vmov r6, s14
|
||||
; CHECK-NEXT: vmov r6, r7, d7
|
||||
; CHECK-NEXT: mov.w r5, #0
|
||||
; CHECK-NEXT: lsrl r6, r7, #31
|
||||
; CHECK-NEXT: it lo
|
||||
|
@ -1566,23 +1548,19 @@ define arm_aapcs_vfpcc void @ssatmul_8t_q15(i16* nocapture readonly %pSrcA, i16*
|
|||
; CHECK-NEXT: vcmp.u32 cs, q1, q5
|
||||
; CHECK-NEXT: vpsel q7, q3, q2
|
||||
; CHECK-NEXT: vcmp.u32 cs, q1, q6
|
||||
; CHECK-NEXT: vmov r4, s28
|
||||
; CHECK-NEXT: vmov r4, r12, d14
|
||||
; CHECK-NEXT: vpsel q6, q3, q2
|
||||
; CHECK-NEXT: vmov.16 q5[0], r4
|
||||
; CHECK-NEXT: vmov r4, s29
|
||||
; CHECK-NEXT: vmov.16 q5[1], r4
|
||||
; CHECK-NEXT: vmov r4, s30
|
||||
; CHECK-NEXT: vmov.16 q5[1], r12
|
||||
; CHECK-NEXT: vmov r4, r12, d15
|
||||
; CHECK-NEXT: vmov.16 q5[2], r4
|
||||
; CHECK-NEXT: vmov r4, s31
|
||||
; CHECK-NEXT: vmov.16 q5[3], r4
|
||||
; CHECK-NEXT: vmov r4, s24
|
||||
; CHECK-NEXT: vmov.16 q5[3], r12
|
||||
; CHECK-NEXT: vmov r4, r12, d12
|
||||
; CHECK-NEXT: vmov.16 q5[4], r4
|
||||
; CHECK-NEXT: vmov r4, s25
|
||||
; CHECK-NEXT: vmov.16 q5[5], r4
|
||||
; CHECK-NEXT: vmov r4, s26
|
||||
; CHECK-NEXT: vmov.16 q5[5], r12
|
||||
; CHECK-NEXT: vmov r4, r12, d13
|
||||
; CHECK-NEXT: vmov.16 q5[6], r4
|
||||
; CHECK-NEXT: vmov r4, s27
|
||||
; CHECK-NEXT: vmov.16 q5[7], r4
|
||||
; CHECK-NEXT: vmov.16 q5[7], r12
|
||||
; CHECK-NEXT: vptt.i16 ne, q5, zr
|
||||
; CHECK-NEXT: vldrht.u16 q5, [r0], #16
|
||||
; CHECK-NEXT: vldrht.u16 q6, [r1], #16
|
||||
|
@ -1685,23 +1663,19 @@ define arm_aapcs_vfpcc void @ssatmul_8ti_q15(i16* nocapture readonly %pSrcA, i16
|
|||
; CHECK-NEXT: vcmp.u32 cs, q1, q5
|
||||
; CHECK-NEXT: vpsel q7, q3, q2
|
||||
; CHECK-NEXT: vcmp.u32 cs, q1, q6
|
||||
; CHECK-NEXT: vmov r4, s28
|
||||
; CHECK-NEXT: vmov r4, r12, d14
|
||||
; CHECK-NEXT: vpsel q6, q3, q2
|
||||
; CHECK-NEXT: vmov.16 q5[0], r4
|
||||
; CHECK-NEXT: vmov r4, s29
|
||||
; CHECK-NEXT: vmov.16 q5[1], r4
|
||||
; CHECK-NEXT: vmov r4, s30
|
||||
; CHECK-NEXT: vmov.16 q5[1], r12
|
||||
; CHECK-NEXT: vmov r4, r12, d15
|
||||
; CHECK-NEXT: vmov.16 q5[2], r4
|
||||
; CHECK-NEXT: vmov r4, s31
|
||||
; CHECK-NEXT: vmov.16 q5[3], r4
|
||||
; CHECK-NEXT: vmov r4, s24
|
||||
; CHECK-NEXT: vmov.16 q5[3], r12
|
||||
; CHECK-NEXT: vmov r4, r12, d12
|
||||
; CHECK-NEXT: vmov.16 q5[4], r4
|
||||
; CHECK-NEXT: vmov r4, s25
|
||||
; CHECK-NEXT: vmov.16 q5[5], r4
|
||||
; CHECK-NEXT: vmov r4, s26
|
||||
; CHECK-NEXT: vmov.16 q5[5], r12
|
||||
; CHECK-NEXT: vmov r4, r12, d13
|
||||
; CHECK-NEXT: vmov.16 q5[6], r4
|
||||
; CHECK-NEXT: vmov r4, s27
|
||||
; CHECK-NEXT: vmov.16 q5[7], r4
|
||||
; CHECK-NEXT: vmov.16 q5[7], r12
|
||||
; CHECK-NEXT: vptt.i16 ne, q5, zr
|
||||
; CHECK-NEXT: vldrht.u16 q5, [r0], #16
|
||||
; CHECK-NEXT: vldrht.u16 q6, [r1], #16
|
||||
|
@ -2601,23 +2575,19 @@ define arm_aapcs_vfpcc void @ssatmul_8t_q7(i8* nocapture readonly %pSrcA, i8* no
|
|||
; CHECK-NEXT: vcmp.u32 cs, q1, q5
|
||||
; CHECK-NEXT: vpsel q7, q3, q2
|
||||
; CHECK-NEXT: vcmp.u32 cs, q1, q6
|
||||
; CHECK-NEXT: vmov r4, s28
|
||||
; CHECK-NEXT: vmov r4, r12, d14
|
||||
; CHECK-NEXT: vpsel q6, q3, q2
|
||||
; CHECK-NEXT: vmov.16 q5[0], r4
|
||||
; CHECK-NEXT: vmov r4, s29
|
||||
; CHECK-NEXT: vmov.16 q5[1], r4
|
||||
; CHECK-NEXT: vmov r4, s30
|
||||
; CHECK-NEXT: vmov.16 q5[1], r12
|
||||
; CHECK-NEXT: vmov r4, r12, d15
|
||||
; CHECK-NEXT: vmov.16 q5[2], r4
|
||||
; CHECK-NEXT: vmov r4, s31
|
||||
; CHECK-NEXT: vmov.16 q5[3], r4
|
||||
; CHECK-NEXT: vmov r4, s24
|
||||
; CHECK-NEXT: vmov.16 q5[3], r12
|
||||
; CHECK-NEXT: vmov r4, r12, d12
|
||||
; CHECK-NEXT: vmov.16 q5[4], r4
|
||||
; CHECK-NEXT: vmov r4, s25
|
||||
; CHECK-NEXT: vmov.16 q5[5], r4
|
||||
; CHECK-NEXT: vmov r4, s26
|
||||
; CHECK-NEXT: vmov.16 q5[5], r12
|
||||
; CHECK-NEXT: vmov r4, r12, d13
|
||||
; CHECK-NEXT: vmov.16 q5[6], r4
|
||||
; CHECK-NEXT: vmov r4, s27
|
||||
; CHECK-NEXT: vmov.16 q5[7], r4
|
||||
; CHECK-NEXT: vmov.16 q5[7], r12
|
||||
; CHECK-NEXT: vptt.i16 ne, q5, zr
|
||||
; CHECK-NEXT: vldrbt.s16 q5, [r0], #8
|
||||
; CHECK-NEXT: vldrbt.s16 q6, [r1], #8
|
||||
|
@ -2688,12 +2658,12 @@ for.cond.cleanup: ; preds = %vector.body, %entry
|
|||
define arm_aapcs_vfpcc void @ssatmul_16t_q7(i8* nocapture readonly %pSrcA, i8* nocapture readonly %pSrcB, i8* noalias nocapture %pDst, i32 %N) {
|
||||
; CHECK-LABEL: ssatmul_16t_q7:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: .pad #56
|
||||
; CHECK-NEXT: sub sp, #56
|
||||
; CHECK-NEXT: .pad #48
|
||||
; CHECK-NEXT: sub sp, #48
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: beq.w .LBB18_3
|
||||
; CHECK-NEXT: @ %bb.1: @ %vector.ph
|
||||
|
@ -2704,14 +2674,14 @@ define arm_aapcs_vfpcc void @ssatmul_16t_q7(i8* nocapture readonly %pSrcA, i8* n
|
|||
; CHECK-NEXT: sub.w r12, r12, #16
|
||||
; CHECK-NEXT: mov.w lr, #1
|
||||
; CHECK-NEXT: adr r4, .LCPI18_1
|
||||
; CHECK-NEXT: vmov.i8 q2, #0x0
|
||||
; CHECK-NEXT: movs r5, #0
|
||||
; CHECK-NEXT: add.w lr, lr, r12, lsr #4
|
||||
; CHECK-NEXT: sub.w r12, r3, #1
|
||||
; CHECK-NEXT: vstrw.32 q0, [sp, #32] @ 16-byte Spill
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r4]
|
||||
; CHECK-NEXT: adr r4, .LCPI18_2
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vdup.32 q1, r12
|
||||
; CHECK-NEXT: vmov.i8 q2, #0x0
|
||||
; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r4]
|
||||
; CHECK-NEXT: adr r4, .LCPI18_3
|
||||
|
@ -2721,90 +2691,82 @@ define arm_aapcs_vfpcc void @ssatmul_16t_q7(i8* nocapture readonly %pSrcA, i8* n
|
|||
; CHECK-NEXT: .LBB18_2: @ %vector.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrw.u32 q4, [sp, #32] @ 16-byte Reload
|
||||
; CHECK-NEXT: vdup.32 q0, r3
|
||||
; CHECK-NEXT: adds r3, #16
|
||||
; CHECK-NEXT: vdup.32 q0, r5
|
||||
; CHECK-NEXT: adds r5, #16
|
||||
; CHECK-NEXT: vorr q4, q0, q4
|
||||
; CHECK-NEXT: vcmp.u32 cs, q1, q4
|
||||
; CHECK-NEXT: vpsel q4, q3, q2
|
||||
; CHECK-NEXT: vmov r4, s16
|
||||
; CHECK-NEXT: vmov r4, r3, d8
|
||||
; CHECK-NEXT: vmov.16 q7[0], r4
|
||||
; CHECK-NEXT: vmov r4, s17
|
||||
; CHECK-NEXT: vmov.16 q7[1], r4
|
||||
; CHECK-NEXT: vmov r4, s18
|
||||
; CHECK-NEXT: vmov.16 q7[2], r4
|
||||
; CHECK-NEXT: vmov r4, s19
|
||||
; CHECK-NEXT: vmov.16 q7[1], r3
|
||||
; CHECK-NEXT: vmov r3, r4, d9
|
||||
; CHECK-NEXT: vldrw.u32 q4, [sp, #16] @ 16-byte Reload
|
||||
; CHECK-NEXT: vmov.16 q7[2], r3
|
||||
; CHECK-NEXT: vmov.16 q7[3], r4
|
||||
; CHECK-NEXT: vorr q4, q0, q4
|
||||
; CHECK-NEXT: vcmp.u32 cs, q1, q4
|
||||
; CHECK-NEXT: vpsel q4, q3, q2
|
||||
; CHECK-NEXT: vmov r4, s16
|
||||
; CHECK-NEXT: vmov.16 q7[4], r4
|
||||
; CHECK-NEXT: vmov r4, s17
|
||||
; CHECK-NEXT: vmov r3, r4, d8
|
||||
; CHECK-NEXT: vmov.16 q7[4], r3
|
||||
; CHECK-NEXT: vmov.16 q7[5], r4
|
||||
; CHECK-NEXT: vmov r4, s18
|
||||
; CHECK-NEXT: vmov.16 q7[6], r4
|
||||
; CHECK-NEXT: vmov r4, s19
|
||||
; CHECK-NEXT: vmov r3, r4, d9
|
||||
; CHECK-NEXT: vmov.16 q7[6], r3
|
||||
; CHECK-NEXT: vmov.16 q7[7], r4
|
||||
; CHECK-NEXT: vcmp.i16 ne, q7, zr
|
||||
; CHECK-NEXT: vpsel q4, q3, q2
|
||||
; CHECK-NEXT: vmov.u16 r4, q4[0]
|
||||
; CHECK-NEXT: vmov.8 q7[0], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q4[1]
|
||||
; CHECK-NEXT: vmov.8 q7[1], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q4[2]
|
||||
; CHECK-NEXT: vmov.8 q7[2], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q4[3]
|
||||
; CHECK-NEXT: vmov.8 q7[3], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q4[4]
|
||||
; CHECK-NEXT: vmov.8 q7[4], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q4[5]
|
||||
; CHECK-NEXT: vmov.8 q7[5], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q4[6]
|
||||
; CHECK-NEXT: vmov.8 q7[6], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q4[7]
|
||||
; CHECK-NEXT: vmov.u16 r3, q4[0]
|
||||
; CHECK-NEXT: vmov.8 q7[0], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q4[1]
|
||||
; CHECK-NEXT: vmov.8 q7[1], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q4[2]
|
||||
; CHECK-NEXT: vmov.8 q7[2], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q4[3]
|
||||
; CHECK-NEXT: vmov.8 q7[3], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q4[4]
|
||||
; CHECK-NEXT: vmov.8 q7[4], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q4[5]
|
||||
; CHECK-NEXT: vmov.8 q7[5], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q4[6]
|
||||
; CHECK-NEXT: vmov.8 q7[6], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q4[7]
|
||||
; CHECK-NEXT: vldrw.u32 q4, [sp] @ 16-byte Reload
|
||||
; CHECK-NEXT: vmov.8 q7[7], r4
|
||||
; CHECK-NEXT: vmov.8 q7[7], r3
|
||||
; CHECK-NEXT: vorr q4, q0, q4
|
||||
; CHECK-NEXT: vorr q0, q0, q6
|
||||
; CHECK-NEXT: vcmp.u32 cs, q1, q4
|
||||
; CHECK-NEXT: vpsel q5, q3, q2
|
||||
; CHECK-NEXT: vcmp.u32 cs, q1, q0
|
||||
; CHECK-NEXT: vmov r4, s20
|
||||
; CHECK-NEXT: vmov r3, r4, d10
|
||||
; CHECK-NEXT: vpsel q0, q3, q2
|
||||
; CHECK-NEXT: vmov.16 q4[0], r4
|
||||
; CHECK-NEXT: vmov r4, s21
|
||||
; CHECK-NEXT: vmov.16 q4[0], r3
|
||||
; CHECK-NEXT: vmov.16 q4[1], r4
|
||||
; CHECK-NEXT: vmov r4, s22
|
||||
; CHECK-NEXT: vmov.16 q4[2], r4
|
||||
; CHECK-NEXT: vmov r4, s23
|
||||
; CHECK-NEXT: vmov r3, r4, d11
|
||||
; CHECK-NEXT: vmov.16 q4[2], r3
|
||||
; CHECK-NEXT: vmov.16 q4[3], r4
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: vmov.16 q4[4], r4
|
||||
; CHECK-NEXT: vmov r4, s1
|
||||
; CHECK-NEXT: vmov r3, r4, d0
|
||||
; CHECK-NEXT: vmov.16 q4[4], r3
|
||||
; CHECK-NEXT: vmov.16 q4[5], r4
|
||||
; CHECK-NEXT: vmov r4, s2
|
||||
; CHECK-NEXT: vmov.16 q4[6], r4
|
||||
; CHECK-NEXT: vmov r4, s3
|
||||
; CHECK-NEXT: vmov r3, r4, d1
|
||||
; CHECK-NEXT: vmov.16 q4[6], r3
|
||||
; CHECK-NEXT: vmov.16 q4[7], r4
|
||||
; CHECK-NEXT: vcmp.i16 ne, q4, zr
|
||||
; CHECK-NEXT: vpsel q0, q3, q2
|
||||
; CHECK-NEXT: vmov.u16 r4, q0[0]
|
||||
; CHECK-NEXT: vmov.8 q7[8], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q0[1]
|
||||
; CHECK-NEXT: vmov.8 q7[9], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q0[2]
|
||||
; CHECK-NEXT: vmov.8 q7[10], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q0[3]
|
||||
; CHECK-NEXT: vmov.8 q7[11], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q0[4]
|
||||
; CHECK-NEXT: vmov.8 q7[12], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q0[5]
|
||||
; CHECK-NEXT: vmov.8 q7[13], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q0[6]
|
||||
; CHECK-NEXT: vmov.8 q7[14], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q0[7]
|
||||
; CHECK-NEXT: vmov.8 q7[15], r4
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[0]
|
||||
; CHECK-NEXT: vmov.8 q7[8], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[1]
|
||||
; CHECK-NEXT: vmov.8 q7[9], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[2]
|
||||
; CHECK-NEXT: vmov.8 q7[10], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[3]
|
||||
; CHECK-NEXT: vmov.8 q7[11], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[4]
|
||||
; CHECK-NEXT: vmov.8 q7[12], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[5]
|
||||
; CHECK-NEXT: vmov.8 q7[13], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[6]
|
||||
; CHECK-NEXT: vmov.8 q7[14], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[7]
|
||||
; CHECK-NEXT: vmov.8 q7[15], r3
|
||||
; CHECK-NEXT: vptt.i8 ne, q7, zr
|
||||
; CHECK-NEXT: vldrbt.u8 q0, [r0], #16
|
||||
; CHECK-NEXT: vldrbt.u8 q4, [r1], #16
|
||||
|
@ -2816,9 +2778,9 @@ define arm_aapcs_vfpcc void @ssatmul_16t_q7(i8* nocapture readonly %pSrcA, i8* n
|
|||
; CHECK-NEXT: vstrbt.8 q0, [r2], #16
|
||||
; CHECK-NEXT: le lr, .LBB18_2
|
||||
; CHECK-NEXT: .LBB18_3: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: add sp, #56
|
||||
; CHECK-NEXT: add sp, #48
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: @ %bb.4:
|
||||
; CHECK-NEXT: .LCPI18_0:
|
||||
|
@ -2888,12 +2850,12 @@ for.cond.cleanup: ; preds = %vector.body, %entry
|
|||
define arm_aapcs_vfpcc void @ssatmul_16ti_q7(i8* nocapture readonly %pSrcA, i8* nocapture readonly %pSrcB, i8* noalias nocapture %pDst, i32 %N) {
|
||||
; CHECK-LABEL: ssatmul_16ti_q7:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: .pad #56
|
||||
; CHECK-NEXT: sub sp, #56
|
||||
; CHECK-NEXT: .pad #48
|
||||
; CHECK-NEXT: sub sp, #48
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: beq.w .LBB19_3
|
||||
; CHECK-NEXT: @ %bb.1: @ %vector.ph
|
||||
|
@ -2904,14 +2866,14 @@ define arm_aapcs_vfpcc void @ssatmul_16ti_q7(i8* nocapture readonly %pSrcA, i8*
|
|||
; CHECK-NEXT: sub.w r12, r12, #16
|
||||
; CHECK-NEXT: mov.w lr, #1
|
||||
; CHECK-NEXT: adr r4, .LCPI19_1
|
||||
; CHECK-NEXT: vmov.i8 q2, #0x0
|
||||
; CHECK-NEXT: movs r5, #0
|
||||
; CHECK-NEXT: add.w lr, lr, r12, lsr #4
|
||||
; CHECK-NEXT: sub.w r12, r3, #1
|
||||
; CHECK-NEXT: vstrw.32 q0, [sp, #32] @ 16-byte Spill
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r4]
|
||||
; CHECK-NEXT: adr r4, .LCPI19_2
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vdup.32 q1, r12
|
||||
; CHECK-NEXT: vmov.i8 q2, #0x0
|
||||
; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r4]
|
||||
; CHECK-NEXT: adr r4, .LCPI19_3
|
||||
|
@ -2921,90 +2883,82 @@ define arm_aapcs_vfpcc void @ssatmul_16ti_q7(i8* nocapture readonly %pSrcA, i8*
|
|||
; CHECK-NEXT: .LBB19_2: @ %vector.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrw.u32 q4, [sp, #32] @ 16-byte Reload
|
||||
; CHECK-NEXT: vdup.32 q0, r3
|
||||
; CHECK-NEXT: adds r3, #16
|
||||
; CHECK-NEXT: vdup.32 q0, r5
|
||||
; CHECK-NEXT: adds r5, #16
|
||||
; CHECK-NEXT: vorr q4, q0, q4
|
||||
; CHECK-NEXT: vcmp.u32 cs, q1, q4
|
||||
; CHECK-NEXT: vpsel q4, q3, q2
|
||||
; CHECK-NEXT: vmov r4, s16
|
||||
; CHECK-NEXT: vmov r4, r3, d8
|
||||
; CHECK-NEXT: vmov.16 q7[0], r4
|
||||
; CHECK-NEXT: vmov r4, s17
|
||||
; CHECK-NEXT: vmov.16 q7[1], r4
|
||||
; CHECK-NEXT: vmov r4, s18
|
||||
; CHECK-NEXT: vmov.16 q7[2], r4
|
||||
; CHECK-NEXT: vmov r4, s19
|
||||
; CHECK-NEXT: vmov.16 q7[1], r3
|
||||
; CHECK-NEXT: vmov r3, r4, d9
|
||||
; CHECK-NEXT: vldrw.u32 q4, [sp, #16] @ 16-byte Reload
|
||||
; CHECK-NEXT: vmov.16 q7[2], r3
|
||||
; CHECK-NEXT: vmov.16 q7[3], r4
|
||||
; CHECK-NEXT: vorr q4, q0, q4
|
||||
; CHECK-NEXT: vcmp.u32 cs, q1, q4
|
||||
; CHECK-NEXT: vpsel q4, q3, q2
|
||||
; CHECK-NEXT: vmov r4, s16
|
||||
; CHECK-NEXT: vmov.16 q7[4], r4
|
||||
; CHECK-NEXT: vmov r4, s17
|
||||
; CHECK-NEXT: vmov r3, r4, d8
|
||||
; CHECK-NEXT: vmov.16 q7[4], r3
|
||||
; CHECK-NEXT: vmov.16 q7[5], r4
|
||||
; CHECK-NEXT: vmov r4, s18
|
||||
; CHECK-NEXT: vmov.16 q7[6], r4
|
||||
; CHECK-NEXT: vmov r4, s19
|
||||
; CHECK-NEXT: vmov r3, r4, d9
|
||||
; CHECK-NEXT: vmov.16 q7[6], r3
|
||||
; CHECK-NEXT: vmov.16 q7[7], r4
|
||||
; CHECK-NEXT: vcmp.i16 ne, q7, zr
|
||||
; CHECK-NEXT: vpsel q4, q3, q2
|
||||
; CHECK-NEXT: vmov.u16 r4, q4[0]
|
||||
; CHECK-NEXT: vmov.8 q7[0], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q4[1]
|
||||
; CHECK-NEXT: vmov.8 q7[1], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q4[2]
|
||||
; CHECK-NEXT: vmov.8 q7[2], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q4[3]
|
||||
; CHECK-NEXT: vmov.8 q7[3], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q4[4]
|
||||
; CHECK-NEXT: vmov.8 q7[4], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q4[5]
|
||||
; CHECK-NEXT: vmov.8 q7[5], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q4[6]
|
||||
; CHECK-NEXT: vmov.8 q7[6], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q4[7]
|
||||
; CHECK-NEXT: vmov.u16 r3, q4[0]
|
||||
; CHECK-NEXT: vmov.8 q7[0], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q4[1]
|
||||
; CHECK-NEXT: vmov.8 q7[1], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q4[2]
|
||||
; CHECK-NEXT: vmov.8 q7[2], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q4[3]
|
||||
; CHECK-NEXT: vmov.8 q7[3], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q4[4]
|
||||
; CHECK-NEXT: vmov.8 q7[4], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q4[5]
|
||||
; CHECK-NEXT: vmov.8 q7[5], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q4[6]
|
||||
; CHECK-NEXT: vmov.8 q7[6], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q4[7]
|
||||
; CHECK-NEXT: vldrw.u32 q4, [sp] @ 16-byte Reload
|
||||
; CHECK-NEXT: vmov.8 q7[7], r4
|
||||
; CHECK-NEXT: vmov.8 q7[7], r3
|
||||
; CHECK-NEXT: vorr q4, q0, q4
|
||||
; CHECK-NEXT: vorr q0, q0, q6
|
||||
; CHECK-NEXT: vcmp.u32 cs, q1, q4
|
||||
; CHECK-NEXT: vpsel q5, q3, q2
|
||||
; CHECK-NEXT: vcmp.u32 cs, q1, q0
|
||||
; CHECK-NEXT: vmov r4, s20
|
||||
; CHECK-NEXT: vmov r3, r4, d10
|
||||
; CHECK-NEXT: vpsel q0, q3, q2
|
||||
; CHECK-NEXT: vmov.16 q4[0], r4
|
||||
; CHECK-NEXT: vmov r4, s21
|
||||
; CHECK-NEXT: vmov.16 q4[0], r3
|
||||
; CHECK-NEXT: vmov.16 q4[1], r4
|
||||
; CHECK-NEXT: vmov r4, s22
|
||||
; CHECK-NEXT: vmov.16 q4[2], r4
|
||||
; CHECK-NEXT: vmov r4, s23
|
||||
; CHECK-NEXT: vmov r3, r4, d11
|
||||
; CHECK-NEXT: vmov.16 q4[2], r3
|
||||
; CHECK-NEXT: vmov.16 q4[3], r4
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: vmov.16 q4[4], r4
|
||||
; CHECK-NEXT: vmov r4, s1
|
||||
; CHECK-NEXT: vmov r3, r4, d0
|
||||
; CHECK-NEXT: vmov.16 q4[4], r3
|
||||
; CHECK-NEXT: vmov.16 q4[5], r4
|
||||
; CHECK-NEXT: vmov r4, s2
|
||||
; CHECK-NEXT: vmov.16 q4[6], r4
|
||||
; CHECK-NEXT: vmov r4, s3
|
||||
; CHECK-NEXT: vmov r3, r4, d1
|
||||
; CHECK-NEXT: vmov.16 q4[6], r3
|
||||
; CHECK-NEXT: vmov.16 q4[7], r4
|
||||
; CHECK-NEXT: vcmp.i16 ne, q4, zr
|
||||
; CHECK-NEXT: vpsel q0, q3, q2
|
||||
; CHECK-NEXT: vmov.u16 r4, q0[0]
|
||||
; CHECK-NEXT: vmov.8 q7[8], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q0[1]
|
||||
; CHECK-NEXT: vmov.8 q7[9], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q0[2]
|
||||
; CHECK-NEXT: vmov.8 q7[10], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q0[3]
|
||||
; CHECK-NEXT: vmov.8 q7[11], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q0[4]
|
||||
; CHECK-NEXT: vmov.8 q7[12], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q0[5]
|
||||
; CHECK-NEXT: vmov.8 q7[13], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q0[6]
|
||||
; CHECK-NEXT: vmov.8 q7[14], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q0[7]
|
||||
; CHECK-NEXT: vmov.8 q7[15], r4
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[0]
|
||||
; CHECK-NEXT: vmov.8 q7[8], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[1]
|
||||
; CHECK-NEXT: vmov.8 q7[9], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[2]
|
||||
; CHECK-NEXT: vmov.8 q7[10], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[3]
|
||||
; CHECK-NEXT: vmov.8 q7[11], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[4]
|
||||
; CHECK-NEXT: vmov.8 q7[12], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[5]
|
||||
; CHECK-NEXT: vmov.8 q7[13], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[6]
|
||||
; CHECK-NEXT: vmov.8 q7[14], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[7]
|
||||
; CHECK-NEXT: vmov.8 q7[15], r3
|
||||
; CHECK-NEXT: vptt.i8 ne, q7, zr
|
||||
; CHECK-NEXT: vldrbt.u8 q0, [r0], #16
|
||||
; CHECK-NEXT: vldrbt.u8 q4, [r1], #16
|
||||
|
@ -3016,9 +2970,9 @@ define arm_aapcs_vfpcc void @ssatmul_16ti_q7(i8* nocapture readonly %pSrcA, i8*
|
|||
; CHECK-NEXT: vstrbt.8 q0, [r2], #16
|
||||
; CHECK-NEXT: le lr, .LBB19_2
|
||||
; CHECK-NEXT: .LBB19_3: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: add sp, #56
|
||||
; CHECK-NEXT: add sp, #48
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: @ %bb.4:
|
||||
; CHECK-NEXT: .LCPI19_0:
|
||||
|
|
|
@ -36,48 +36,44 @@ define arm_aapcs_vfpcc <2 x i64> @sadd_int64_t(<2 x i64> %src1, <2 x i64> %src2)
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r4, s4
|
||||
; CHECK-NEXT: vmov r5, s0
|
||||
; CHECK-NEXT: adds r2, r2, r3
|
||||
; CHECK-NEXT: vmov r3, s5
|
||||
; CHECK-NEXT: eor.w r12, r1, r0
|
||||
; CHECK-NEXT: adcs r0, r1
|
||||
; CHECK-NEXT: eors r1, r0
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: adds r2, r2, r0
|
||||
; CHECK-NEXT: eor.w r12, r3, r1
|
||||
; CHECK-NEXT: adc.w r0, r3, r1
|
||||
; CHECK-NEXT: eor.w r1, r3, r0
|
||||
; CHECK-NEXT: vmov r3, r4, d0
|
||||
; CHECK-NEXT: bic.w r1, r1, r12
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov lr, r1, d2
|
||||
; CHECK-NEXT: cset r12, mi
|
||||
; CHECK-NEXT: cmp.w r12, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: asrne r2, r0, #31
|
||||
; CHECK-NEXT: adds r4, r4, r5
|
||||
; CHECK-NEXT: mvn r5, #-2147483648
|
||||
; CHECK-NEXT: eor.w lr, r1, r3
|
||||
; CHECK-NEXT: adcs r3, r1
|
||||
; CHECK-NEXT: eors r1, r3
|
||||
; CHECK-NEXT: bic.w r1, r1, lr
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: cset r1, mi
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: adds.w r3, r3, lr
|
||||
; CHECK-NEXT: eor.w r5, r4, r1
|
||||
; CHECK-NEXT: adcs r1, r4
|
||||
; CHECK-NEXT: eors r4, r1
|
||||
; CHECK-NEXT: bic.w r5, r4, r5
|
||||
; CHECK-NEXT: cmp r5, #0
|
||||
; CHECK-NEXT: cset r5, mi
|
||||
; CHECK-NEXT: cmp r5, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: asrne r4, r3, #31
|
||||
; CHECK-NEXT: asrne r3, r1, #31
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r4, r2
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
|
||||
; CHECK-NEXT: cset r2, mi
|
||||
; CHECK-NEXT: mvn r3, #-2147483648
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: cinv r2, r5, eq
|
||||
; CHECK-NEXT: cinv r2, r3, eq
|
||||
; CHECK-NEXT: cmp.w r12, #0
|
||||
; CHECK-NEXT: csel r0, r2, r0, ne
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: cset r2, mi
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: cinv r2, r5, eq
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csel r1, r2, r3, ne
|
||||
; CHECK-NEXT: cinv r2, r3, eq
|
||||
; CHECK-NEXT: cmp r5, #0
|
||||
; CHECK-NEXT: csel r1, r2, r1, ne
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
|
@ -120,33 +116,29 @@ define arm_aapcs_vfpcc <2 x i64> @uadd_int64_t(<2 x i64> %src1, <2 x i64> %src2)
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r4, s4
|
||||
; CHECK-NEXT: vmov r5, s0
|
||||
; CHECK-NEXT: adds r2, r2, r3
|
||||
; CHECK-NEXT: vmov r3, s5
|
||||
; CHECK-NEXT: adcs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov r3, r2, d2
|
||||
; CHECK-NEXT: adcs lr, r12, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: movne.w r2, #-1
|
||||
; CHECK-NEXT: adds r4, r4, r5
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: adcs r3, r12, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: movne.w r4, #-1
|
||||
; CHECK-NEXT: cmp.w lr, #0
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r4, r2
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: movne.w r0, #-1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: adds r3, r3, r4
|
||||
; CHECK-NEXT: adcs r2, r5
|
||||
; CHECK-NEXT: adcs r5, r12, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: movne.w r3, #-1
|
||||
; CHECK-NEXT: cmp.w lr, #0
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: movne.w r1, #-1
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: cmp r5, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: movne.w r2, #-1
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r2, r1
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%0 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2)
|
||||
|
@ -189,46 +181,42 @@ define arm_aapcs_vfpcc <2 x i64> @ssub_int64_t(<2 x i64> %src1, <2 x i64> %src2)
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r4, s4
|
||||
; CHECK-NEXT: vmov r5, s0
|
||||
; CHECK-NEXT: subs r2, r2, r3
|
||||
; CHECK-NEXT: vmov r3, s5
|
||||
; CHECK-NEXT: eor.w r12, r1, r0
|
||||
; CHECK-NEXT: sbc.w r0, r1, r0
|
||||
; CHECK-NEXT: eors r1, r0
|
||||
; CHECK-NEXT: ands.w r1, r1, r12
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: subs r0, r2, r0
|
||||
; CHECK-NEXT: eor.w r12, r3, r1
|
||||
; CHECK-NEXT: sbc.w r1, r3, r1
|
||||
; CHECK-NEXT: eor.w r2, r3, r1
|
||||
; CHECK-NEXT: vmov r3, r4, d0
|
||||
; CHECK-NEXT: ands.w r2, r2, r12
|
||||
; CHECK-NEXT: vmov lr, r2, d2
|
||||
; CHECK-NEXT: cset r12, mi
|
||||
; CHECK-NEXT: cmp.w r12, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: asrne r2, r0, #31
|
||||
; CHECK-NEXT: subs r4, r5, r4
|
||||
; CHECK-NEXT: mvn r5, #-2147483648
|
||||
; CHECK-NEXT: eor.w lr, r1, r3
|
||||
; CHECK-NEXT: sbc.w r3, r1, r3
|
||||
; CHECK-NEXT: eors r1, r3
|
||||
; CHECK-NEXT: ands.w r1, r1, lr
|
||||
; CHECK-NEXT: asrne r0, r1, #31
|
||||
; CHECK-NEXT: subs.w r3, r3, lr
|
||||
; CHECK-NEXT: eor.w r5, r4, r2
|
||||
; CHECK-NEXT: sbc.w r2, r4, r2
|
||||
; CHECK-NEXT: eors r4, r2
|
||||
; CHECK-NEXT: ands r5, r4
|
||||
; CHECK-NEXT: cset r5, mi
|
||||
; CHECK-NEXT: cmp r5, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: asrne r3, r2, #31
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r0
|
||||
; CHECK-NEXT: cset r0, mi
|
||||
; CHECK-NEXT: mvn r3, #-2147483648
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: cinv r0, r3, eq
|
||||
; CHECK-NEXT: cmp.w r12, #0
|
||||
; CHECK-NEXT: csel r0, r0, r1, ne
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: cset r1, mi
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: asrne r4, r3, #31
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r4, r2
|
||||
; CHECK-NEXT: cset r2, mi
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: cinv r2, r5, eq
|
||||
; CHECK-NEXT: cmp.w r12, #0
|
||||
; CHECK-NEXT: csel r0, r2, r0, ne
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: cset r2, mi
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: cinv r2, r5, eq
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csel r1, r2, r3, ne
|
||||
; CHECK-NEXT: cinv r1, r3, eq
|
||||
; CHECK-NEXT: cmp r5, #0
|
||||
; CHECK-NEXT: csel r1, r1, r2, ne
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
|
@ -271,35 +259,31 @@ define arm_aapcs_vfpcc <2 x i64> @usub_int64_t(<2 x i64> %src1, <2 x i64> %src2)
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r4, s4
|
||||
; CHECK-NEXT: vmov r5, s0
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: vmov r3, s5
|
||||
; CHECK-NEXT: sbcs.w r0, r1, r0
|
||||
; CHECK-NEXT: adc r1, r12, #0
|
||||
; CHECK-NEXT: rsbs.w lr, r1, #1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: movne r2, #0
|
||||
; CHECK-NEXT: subs r4, r5, r4
|
||||
; CHECK-NEXT: sbcs r1, r3
|
||||
; CHECK-NEXT: adc r3, r12, #0
|
||||
; CHECK-NEXT: rsbs.w r3, r3, #1
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: movne r4, #0
|
||||
; CHECK-NEXT: cmp.w lr, #0
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r4, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: subs r0, r2, r0
|
||||
; CHECK-NEXT: sbcs.w r1, r3, r1
|
||||
; CHECK-NEXT: adc r2, r12, #0
|
||||
; CHECK-NEXT: rsbs.w lr, r2, #1
|
||||
; CHECK-NEXT: vmov r3, r2, d2
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: movne r0, #0
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: subs r3, r4, r3
|
||||
; CHECK-NEXT: sbcs.w r2, r5, r2
|
||||
; CHECK-NEXT: adc r5, r12, #0
|
||||
; CHECK-NEXT: rsbs.w r5, r5, #1
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: movne r3, #0
|
||||
; CHECK-NEXT: cmp.w lr, #0
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: movne r1, #0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: cmp r5, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: movne r2, #0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r2, r1
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%0 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2)
|
||||
|
|
|
@ -21,41 +21,36 @@ define arm_aapcs_vfpcc void @scatter_inc_minipred_4i32(<4 x i32> %data, i32* %ds
|
|||
define arm_aapcs_vfpcc void @scatter_inc_mini_8i16(<8 x i16> %data, i16* %dst, <8 x i32> %offs) {
|
||||
; CHECK-LABEL: scatter_inc_mini_8i16:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vshl.i32 q1, q1, #1
|
||||
; CHECK-NEXT: vmov.i32 q3, #0x10
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[0]
|
||||
; CHECK-NEXT: vadd.i32 q4, q1, q3
|
||||
; CHECK-NEXT: vshl.i32 q1, q2, #1
|
||||
; CHECK-NEXT: vmov r1, s16
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r0, s17
|
||||
; CHECK-NEXT: vmov.u16 r6, q0[0]
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, q3
|
||||
; CHECK-NEXT: strh r2, [r1]
|
||||
; CHECK-NEXT: vmov r1, r2, d2
|
||||
; CHECK-NEXT: vmov r3, r12, d3
|
||||
; CHECK-NEXT: vshl.i32 q1, q2, #1
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, q3
|
||||
; CHECK-NEXT: vmov r0, lr, d2
|
||||
; CHECK-NEXT: vmov r4, r5, d3
|
||||
; CHECK-NEXT: strh r6, [r1]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[1]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: strh r1, [r2]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[2]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s19
|
||||
; CHECK-NEXT: strh r1, [r3]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[3]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: strh.w r1, [r12]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[4]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[5]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[6]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[7]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[5]
|
||||
; CHECK-NEXT: strh.w r0, [lr]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[6]
|
||||
; CHECK-NEXT: strh r0, [r4]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[7]
|
||||
; CHECK-NEXT: strh r0, [r5]
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
%1 = add <8 x i32> %offs, <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
|
||||
%2 = getelementptr inbounds i16, i16* %dst, <8 x i32> %1
|
||||
call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %data, <8 x i16*> %2, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
|
||||
|
@ -65,69 +60,66 @@ define arm_aapcs_vfpcc void @scatter_inc_mini_8i16(<8 x i16> %data, i16* %dst, <
|
|||
define arm_aapcs_vfpcc void @scatter_inc_mini_16i8(<16 x i8> %data, i8* %dst, <16 x i32> %offs) {
|
||||
; CHECK-LABEL: scatter_inc_mini_16i8:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vmov.i32 q5, #0x10
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
|
||||
; CHECK-NEXT: .pad #4
|
||||
; CHECK-NEXT: sub sp, #4
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov.i32 q4, #0x10
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vadd.i32 q4, q1, q5
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[0]
|
||||
; CHECK-NEXT: vmov r1, s16
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, q4
|
||||
; CHECK-NEXT: add r5, sp, #48
|
||||
; CHECK-NEXT: vmov r1, r2, d2
|
||||
; CHECK-NEXT: vadd.i32 q3, q3, r0
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, r0
|
||||
; CHECK-NEXT: vadd.i32 q3, q3, q5
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, q5
|
||||
; CHECK-NEXT: strb r2, [r1]
|
||||
; CHECK-NEXT: add r1, sp, #32
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r1]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[1]
|
||||
; CHECK-NEXT: vmov r3, r12, d3
|
||||
; CHECK-NEXT: vadd.i32 q1, q2, r0
|
||||
; CHECK-NEXT: vadd.i32 q2, q1, q4
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r5]
|
||||
; CHECK-NEXT: vmov lr, r7, d4
|
||||
; CHECK-NEXT: vmov.u8 r6, q0[0]
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r0, s17
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, q5
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[2]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s19
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[3]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[4]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[5]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[6]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[7]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[8]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s13
|
||||
; CHECK-NEXT: vmov r0, r8, d5
|
||||
; CHECK-NEXT: vadd.i32 q2, q3, q4
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[4]
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, q4
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[6]
|
||||
; CHECK-NEXT: strb r6, [r1]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[1]
|
||||
; CHECK-NEXT: strb r1, [r2]
|
||||
; CHECK-NEXT: vmov.u8 r6, q0[2]
|
||||
; CHECK-NEXT: vmov r1, r9, d4
|
||||
; CHECK-NEXT: strb r6, [r3]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[3]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[8]
|
||||
; CHECK-NEXT: strb.w r3, [r12]
|
||||
; CHECK-NEXT: vmov r3, r6, d5
|
||||
; CHECK-NEXT: strb.w r4, [lr]
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[5]
|
||||
; CHECK-NEXT: strb r4, [r7]
|
||||
; CHECK-NEXT: vmov r7, r4, d2
|
||||
; CHECK-NEXT: strb r5, [r0]
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[7]
|
||||
; CHECK-NEXT: strb.w r0, [r8]
|
||||
; CHECK-NEXT: vmov r0, r5, d3
|
||||
; CHECK-NEXT: strb r2, [r1]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[9]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: strb.w r1, [r9]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[10]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: strb r1, [r3]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[11]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: strb r1, [r6]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[12]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: strb r1, [r7]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[13]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: strb r1, [r4]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[14]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[15]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[15]
|
||||
; CHECK-NEXT: strb r0, [r5]
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: add sp, #4
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
|
||||
%1 = add <16 x i32> %offs, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
|
||||
%2 = getelementptr inbounds i8, i8* %dst, <16 x i32> %1
|
||||
call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %data, <16 x i8*> %2, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
|
||||
|
|
|
@ -51,37 +51,35 @@ entry:
|
|||
define arm_aapcs_vfpcc void @scaled_v8i16_sext(i16* %base, <8 x i16>* %offptr, <8 x i16> %input) {
|
||||
; CHECK-LABEL: scaled_v8i16_sext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrh.s32 q2, [r1]
|
||||
; CHECK-NEXT: vldrh.s32 q1, [r1, #8]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[0]
|
||||
; CHECK-NEXT: vshl.i32 q2, q2, #1
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vldrh.s32 q1, [r1]
|
||||
; CHECK-NEXT: vmov.u16 r6, q0[0]
|
||||
; CHECK-NEXT: vshl.i32 q1, q1, #1
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, r0
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[1]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[2]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[3]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[4]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[5]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[6]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[7]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: vmov r12, lr, d3
|
||||
; CHECK-NEXT: vldrh.s32 q1, [r1, #8]
|
||||
; CHECK-NEXT: vshl.i32 q1, q1, #1
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: vmov r4, r5, d3
|
||||
; CHECK-NEXT: strh r6, [r2]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[1]
|
||||
; CHECK-NEXT: strh r2, [r3]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[2]
|
||||
; CHECK-NEXT: strh.w r2, [r12]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[3]
|
||||
; CHECK-NEXT: strh.w r2, [lr]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[4]
|
||||
; CHECK-NEXT: strh r2, [r0]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[5]
|
||||
; CHECK-NEXT: strh r0, [r1]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[6]
|
||||
; CHECK-NEXT: strh r0, [r4]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[7]
|
||||
; CHECK-NEXT: strh r0, [r5]
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%offs = load <8 x i16>, <8 x i16>* %offptr, align 2
|
||||
%offs.sext = sext <8 x i16> %offs to <8 x i32>
|
||||
|
@ -99,27 +97,23 @@ define arm_aapcs_vfpcc void @scaled_v8f16_sext(i16* %base, <8 x i16>* %offptr, <
|
|||
; CHECK-NEXT: vshl.i32 q2, q1, #1
|
||||
; CHECK-NEXT: vldrh.s32 q1, [r1, #8]
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, r0
|
||||
; CHECK-NEXT: vmov r1, s8
|
||||
; CHECK-NEXT: vshl.i32 q1, q1, #1
|
||||
; CHECK-NEXT: vmov r1, r2, d4
|
||||
; CHECK-NEXT: vstr.16 s0, [r1]
|
||||
; CHECK-NEXT: vmov r1, s9
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vstr.16 s12, [r1]
|
||||
; CHECK-NEXT: vmov r1, s10
|
||||
; CHECK-NEXT: vmovx.f16 s0, s3
|
||||
; CHECK-NEXT: vstr.16 s1, [r1]
|
||||
; CHECK-NEXT: vmov r1, s11
|
||||
; CHECK-NEXT: vstr.16 s12, [r2]
|
||||
; CHECK-NEXT: vmov r1, r2, d5
|
||||
; CHECK-NEXT: vmovx.f16 s8, s1
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vstr.16 s8, [r1]
|
||||
; CHECK-NEXT: vstr.16 s2, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vstr.16 s1, [r1]
|
||||
; CHECK-NEXT: vstr.16 s8, [r2]
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: vmovx.f16 s8, s2
|
||||
; CHECK-NEXT: vstr.16 s8, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vstr.16 s2, [r0]
|
||||
; CHECK-NEXT: vstr.16 s8, [r1]
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: vmovx.f16 s0, s3
|
||||
; CHECK-NEXT: vstr.16 s3, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: vstr.16 s0, [r1]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%offs = load <8 x i16>, <8 x i16>* %offptr, align 2
|
||||
|
@ -180,40 +174,38 @@ entry:
|
|||
define arm_aapcs_vfpcc void @scaled_v8i16_i16_2gep(i16* %base, <8 x i16>* %offptr, <8 x i16> %input) {
|
||||
; CHECK-LABEL: scaled_v8i16_i16_2gep:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrh.s32 q1, [r1, #8]
|
||||
; CHECK-NEXT: vldrh.s32 q3, [r1]
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x28
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[0]
|
||||
; CHECK-NEXT: vshl.i32 q1, q1, #1
|
||||
; CHECK-NEXT: vshl.i32 q3, q3, #1
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vadd.i32 q3, q3, r0
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, q2
|
||||
; CHECK-NEXT: vadd.i32 q2, q3, q2
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[1]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[2]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[3]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[4]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[5]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[6]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[7]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vldrh.s32 q2, [r1]
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x28
|
||||
; CHECK-NEXT: vmov.u16 r6, q0[0]
|
||||
; CHECK-NEXT: vshl.i32 q2, q2, #1
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, r0
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, q1
|
||||
; CHECK-NEXT: vmov r2, r3, d4
|
||||
; CHECK-NEXT: vmov r12, lr, d5
|
||||
; CHECK-NEXT: vldrh.s32 q2, [r1, #8]
|
||||
; CHECK-NEXT: vshl.i32 q2, q2, #1
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, r0
|
||||
; CHECK-NEXT: vadd.i32 q1, q2, q1
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: vmov r4, r5, d3
|
||||
; CHECK-NEXT: strh r6, [r2]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[1]
|
||||
; CHECK-NEXT: strh r2, [r3]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[2]
|
||||
; CHECK-NEXT: strh.w r2, [r12]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[3]
|
||||
; CHECK-NEXT: strh.w r2, [lr]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[4]
|
||||
; CHECK-NEXT: strh r2, [r0]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[5]
|
||||
; CHECK-NEXT: strh r0, [r1]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[6]
|
||||
; CHECK-NEXT: strh r0, [r4]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[7]
|
||||
; CHECK-NEXT: strh r0, [r5]
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%offs = load <8 x i16>, <8 x i16>* %offptr, align 2
|
||||
%ptrs = getelementptr inbounds i16, i16* %base, <8 x i16> %offs
|
||||
|
|
|
@ -69,35 +69,33 @@ entry:
|
|||
define arm_aapcs_vfpcc void @unscaled_v8i16_sext(i8* %base, <8 x i16>* %offptr, <8 x i16> %input) {
|
||||
; CHECK-LABEL: unscaled_v8i16_sext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrh.s32 q2, [r1]
|
||||
; CHECK-NEXT: vldrh.s32 q1, [r1, #8]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[0]
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, r0
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vldrh.s32 q1, [r1]
|
||||
; CHECK-NEXT: vmov.u16 r6, q0[0]
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[1]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[2]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[3]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[4]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[5]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[6]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[7]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: vmov r12, lr, d3
|
||||
; CHECK-NEXT: vldrh.s32 q1, [r1, #8]
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: vmov r4, r5, d3
|
||||
; CHECK-NEXT: strh r6, [r2]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[1]
|
||||
; CHECK-NEXT: strh r2, [r3]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[2]
|
||||
; CHECK-NEXT: strh.w r2, [r12]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[3]
|
||||
; CHECK-NEXT: strh.w r2, [lr]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[4]
|
||||
; CHECK-NEXT: strh r2, [r0]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[5]
|
||||
; CHECK-NEXT: strh r0, [r1]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[6]
|
||||
; CHECK-NEXT: strh r0, [r4]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[7]
|
||||
; CHECK-NEXT: strh r0, [r5]
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%offs = load <8 x i16>, <8 x i16>* %offptr, align 2
|
||||
%offs.sext = sext <8 x i16> %offs to <8 x i32>
|
||||
|
@ -116,25 +114,21 @@ define arm_aapcs_vfpcc void @unscaled_v8f16_sext(i8* %base, <8 x i16>* %offptr,
|
|||
; CHECK-NEXT: vmovx.f16 s12, s0
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, r0
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r1, s8
|
||||
; CHECK-NEXT: vmov r1, r2, d4
|
||||
; CHECK-NEXT: vstr.16 s0, [r1]
|
||||
; CHECK-NEXT: vmov r1, s9
|
||||
; CHECK-NEXT: vstr.16 s12, [r1]
|
||||
; CHECK-NEXT: vmov r1, s10
|
||||
; CHECK-NEXT: vstr.16 s1, [r1]
|
||||
; CHECK-NEXT: vmov r1, s11
|
||||
; CHECK-NEXT: vstr.16 s12, [r2]
|
||||
; CHECK-NEXT: vmov r1, r2, d5
|
||||
; CHECK-NEXT: vmovx.f16 s8, s1
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vstr.16 s8, [r1]
|
||||
; CHECK-NEXT: vstr.16 s2, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vstr.16 s1, [r1]
|
||||
; CHECK-NEXT: vstr.16 s8, [r2]
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: vmovx.f16 s8, s2
|
||||
; CHECK-NEXT: vstr.16 s8, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vstr.16 s3, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vstr.16 s2, [r0]
|
||||
; CHECK-NEXT: vstr.16 s8, [r1]
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: vmovx.f16 s0, s3
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: vstr.16 s3, [r0]
|
||||
; CHECK-NEXT: vstr.16 s0, [r1]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%offs = load <8 x i16>, <8 x i16>* %offptr, align 2
|
||||
|
@ -149,35 +143,33 @@ entry:
|
|||
define arm_aapcs_vfpcc void @unscaled_v8i16_noext(i8* %base, <8 x i32>* %offptr, <8 x i16> %input) {
|
||||
; CHECK-LABEL: unscaled_v8i16_noext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r1]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r1, #16]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[0]
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, r0
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r1]
|
||||
; CHECK-NEXT: vmov.u16 r6, q0[0]
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[1]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[2]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[3]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[4]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[5]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[6]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[7]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: vmov r12, lr, d3
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r1, #16]
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: vmov r4, r5, d3
|
||||
; CHECK-NEXT: strh r6, [r2]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[1]
|
||||
; CHECK-NEXT: strh r2, [r3]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[2]
|
||||
; CHECK-NEXT: strh.w r2, [r12]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[3]
|
||||
; CHECK-NEXT: strh.w r2, [lr]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[4]
|
||||
; CHECK-NEXT: strh r2, [r0]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[5]
|
||||
; CHECK-NEXT: strh r0, [r1]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[6]
|
||||
; CHECK-NEXT: strh r0, [r4]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[7]
|
||||
; CHECK-NEXT: strh r0, [r5]
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%offs = load <8 x i32>, <8 x i32>* %offptr, align 4
|
||||
%byte_ptrs = getelementptr inbounds i8, i8* %base, <8 x i32> %offs
|
||||
|
@ -195,25 +187,21 @@ define arm_aapcs_vfpcc void @unscaled_v8f16_noext(i8* %base, <8 x i32>* %offptr,
|
|||
; CHECK-NEXT: vmovx.f16 s12, s0
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, r0
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r1, s8
|
||||
; CHECK-NEXT: vmov r1, r2, d4
|
||||
; CHECK-NEXT: vstr.16 s0, [r1]
|
||||
; CHECK-NEXT: vmov r1, s9
|
||||
; CHECK-NEXT: vstr.16 s12, [r1]
|
||||
; CHECK-NEXT: vmov r1, s10
|
||||
; CHECK-NEXT: vstr.16 s1, [r1]
|
||||
; CHECK-NEXT: vmov r1, s11
|
||||
; CHECK-NEXT: vstr.16 s12, [r2]
|
||||
; CHECK-NEXT: vmov r1, r2, d5
|
||||
; CHECK-NEXT: vmovx.f16 s8, s1
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vstr.16 s8, [r1]
|
||||
; CHECK-NEXT: vstr.16 s2, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vstr.16 s1, [r1]
|
||||
; CHECK-NEXT: vstr.16 s8, [r2]
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: vmovx.f16 s8, s2
|
||||
; CHECK-NEXT: vstr.16 s8, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vstr.16 s3, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vstr.16 s2, [r0]
|
||||
; CHECK-NEXT: vstr.16 s8, [r1]
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: vmovx.f16 s0, s3
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: vstr.16 s3, [r0]
|
||||
; CHECK-NEXT: vstr.16 s0, [r1]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%offs = load <8 x i32>, <8 x i32>* %offptr, align 4
|
||||
|
@ -259,38 +247,36 @@ entry:
|
|||
define arm_aapcs_vfpcc void @trunc_signed_unscaled_i64_i8(i8* %base, <8 x i8>* %offptr, <8 x i64> %input) {
|
||||
; CHECK-LABEL: trunc_signed_unscaled_i64_i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vldrb.s32 q5, [r1]
|
||||
; CHECK-NEXT: vldrb.s32 q4, [r1, #4]
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vadd.i32 q5, q5, r0
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vldrb.s32 q4, [r1]
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: vadd.i32 q4, q4, r0
|
||||
; CHECK-NEXT: vmov r0, s20
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s21
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s22
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s23
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s16
|
||||
; CHECK-NEXT: vmov r1, s8
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s17
|
||||
; CHECK-NEXT: vmov r1, s10
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov r1, s12
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s19
|
||||
; CHECK-NEXT: vmov r1, s14
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov r2, r3, d8
|
||||
; CHECK-NEXT: vmov r12, lr, d9
|
||||
; CHECK-NEXT: vldrb.s32 q4, [r1, #4]
|
||||
; CHECK-NEXT: vadd.i32 q4, q4, r0
|
||||
; CHECK-NEXT: vmov r0, r1, d8
|
||||
; CHECK-NEXT: strh r4, [r2]
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r4, r5, d9
|
||||
; CHECK-NEXT: strh r2, [r3]
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: strh.w r2, [r12]
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: strh.w r2, [lr]
|
||||
; CHECK-NEXT: vmov r2, s8
|
||||
; CHECK-NEXT: strh r2, [r0]
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: strh r0, [r1]
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: strh r0, [r4]
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: strh r0, [r5]
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%offs = load <8 x i8>, <8 x i8>* %offptr, align 1
|
||||
%offs.sext = sext <8 x i8> %offs to <8 x i32>
|
||||
|
@ -341,35 +327,29 @@ entry:
|
|||
define arm_aapcs_vfpcc void @trunc_signed_unscaled_i32_i8(i8* %base, <8 x i8>* %offptr, <8 x i32> %input) {
|
||||
; CHECK-LABEL: trunc_signed_unscaled_i32_i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrb.s32 q3, [r1]
|
||||
; CHECK-NEXT: vldrb.s32 q2, [r1, #4]
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vadd.i32 q3, q3, r0
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: vldrb.s32 q2, [r1]
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, r0
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s13
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov r2, r3, d4
|
||||
; CHECK-NEXT: vmov r12, lr, d5
|
||||
; CHECK-NEXT: vldrb.s32 q2, [r1, #4]
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, r0
|
||||
; CHECK-NEXT: vmov r0, r6, d1
|
||||
; CHECK-NEXT: strh r4, [r2]
|
||||
; CHECK-NEXT: vmov r2, r7, d4
|
||||
; CHECK-NEXT: strh r5, [r3]
|
||||
; CHECK-NEXT: vmov r3, r5, d5
|
||||
; CHECK-NEXT: strh.w r0, [r12]
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: strh.w r6, [lr]
|
||||
; CHECK-NEXT: vmov r6, r4, d3
|
||||
; CHECK-NEXT: strh r0, [r2]
|
||||
; CHECK-NEXT: strh r1, [r7]
|
||||
; CHECK-NEXT: strh r6, [r3]
|
||||
; CHECK-NEXT: strh r4, [r5]
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
entry:
|
||||
%offs = load <8 x i8>, <8 x i8>* %offptr, align 1
|
||||
%offs.sext = sext <8 x i8> %offs to <8 x i32>
|
||||
|
@ -384,25 +364,23 @@ entry:
|
|||
define arm_aapcs_vfpcc void @trunc_unsigned_unscaled_i32_i8(i8* %base, <8 x i8>* %offptr, <8 x i32> %input) {
|
||||
; CHECK-LABEL: trunc_unsigned_unscaled_i32_i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov.16 q2[0], r3
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov.16 q2[1], r3
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov.16 q2[2], r3
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: vmov.16 q2[3], r3
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: vmov.16 q2[0], r4
|
||||
; CHECK-NEXT: vmov lr, r12, d3
|
||||
; CHECK-NEXT: vmov r3, r2, d2
|
||||
; CHECK-NEXT: vldrb.u16 q1, [r1]
|
||||
; CHECK-NEXT: vmov r1, r4, d1
|
||||
; CHECK-NEXT: vmov.16 q2[1], r5
|
||||
; CHECK-NEXT: vmov.16 q2[2], r1
|
||||
; CHECK-NEXT: vmov.16 q2[3], r4
|
||||
; CHECK-NEXT: vmov.16 q2[4], r3
|
||||
; CHECK-NEXT: vmov r3, s5
|
||||
; CHECK-NEXT: vmov.16 q2[5], r3
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: vmov r2, s7
|
||||
; CHECK-NEXT: vmov.16 q2[6], r3
|
||||
; CHECK-NEXT: vldrb.u16 q0, [r1]
|
||||
; CHECK-NEXT: vmov.16 q2[7], r2
|
||||
; CHECK-NEXT: vstrh.16 q2, [r0, q0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov.16 q2[5], r2
|
||||
; CHECK-NEXT: vmov.16 q2[6], lr
|
||||
; CHECK-NEXT: vmov.16 q2[7], r12
|
||||
; CHECK-NEXT: vstrh.16 q2, [r0, q1]
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%offs = load <8 x i8>, <8 x i8>* %offptr, align 1
|
||||
%offs.zext = zext <8 x i8> %offs to <8 x i32>
|
||||
|
@ -417,35 +395,33 @@ entry:
|
|||
define arm_aapcs_vfpcc void @trunc_signed_unscaled_i16_i8(i8* %base, <8 x i8>* %offptr, <8 x i16> %input) {
|
||||
; CHECK-LABEL: trunc_signed_unscaled_i16_i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrb.s32 q2, [r1]
|
||||
; CHECK-NEXT: vldrb.s32 q1, [r1, #4]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[0]
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, r0
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vldrb.s32 q1, [r1]
|
||||
; CHECK-NEXT: vmov.u16 r6, q0[0]
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[1]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[2]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[3]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[4]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[5]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[6]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[7]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: vmov r12, lr, d3
|
||||
; CHECK-NEXT: vldrb.s32 q1, [r1, #4]
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: vmov r4, r5, d3
|
||||
; CHECK-NEXT: strb r6, [r2]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[1]
|
||||
; CHECK-NEXT: strb r2, [r3]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[2]
|
||||
; CHECK-NEXT: strb.w r2, [r12]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[3]
|
||||
; CHECK-NEXT: strb.w r2, [lr]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[4]
|
||||
; CHECK-NEXT: strb r2, [r0]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[5]
|
||||
; CHECK-NEXT: strb r0, [r1]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[6]
|
||||
; CHECK-NEXT: strb r0, [r4]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[7]
|
||||
; CHECK-NEXT: strb r0, [r5]
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%offs = load <8 x i8>, <8 x i8>* %offptr, align 1
|
||||
%offs.sext = sext <8 x i8> %offs to <8 x i32>
|
||||
|
|
|
@ -236,24 +236,22 @@ entry:
|
|||
define arm_aapcs_vfpcc void @ext_scaled_i16_i32_2gep(i16* %base, <4 x i32>* %offptr, <4 x i32> %input) {
|
||||
; CHECK-LABEL: ext_scaled_i16_i32_2gep:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r1]
|
||||
; CHECK-NEXT: vmov.i32 q1, #0xa
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r1, r3, d0
|
||||
; CHECK-NEXT: vshl.i32 q2, q2, #1
|
||||
; CHECK-NEXT: vmov r4, r5, d1
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, r0
|
||||
; CHECK-NEXT: vadd.i32 q1, q2, q1
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r0, r12, d2
|
||||
; CHECK-NEXT: vmov r2, lr, d3
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: strh.w r3, [r12]
|
||||
; CHECK-NEXT: strh r4, [r2]
|
||||
; CHECK-NEXT: strh.w r5, [lr]
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%offs = load <4 x i32>, <4 x i32>* %offptr, align 4
|
||||
%ptrs = getelementptr inbounds i16, i16* %base, <4 x i32> %offs
|
||||
|
|
|
@ -361,21 +361,19 @@ entry:
|
|||
define arm_aapcs_vfpcc void @trunc_signed_unscaled_i16_i8(i8* %base, <4 x i8>* %offptr, <4 x i16> %input) {
|
||||
; CHECK-LABEL: trunc_signed_unscaled_i16_i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vldrb.s32 q1, [r1]
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r1, r3, d0
|
||||
; CHECK-NEXT: vmov r4, r5, d1
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r0, r12, d2
|
||||
; CHECK-NEXT: vmov r2, lr, d3
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: strb.w r3, [r12]
|
||||
; CHECK-NEXT: strb r4, [r2]
|
||||
; CHECK-NEXT: strb.w r5, [lr]
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%offs = load <4 x i8>, <4 x i8>* %offptr, align 1
|
||||
%offs.sext = sext <4 x i8> %offs to <4 x i32>
|
||||
|
@ -388,21 +386,19 @@ entry:
|
|||
define arm_aapcs_vfpcc void @trunc_unsigned_unscaled_i16_i8(i8* %base, <4 x i8>* %offptr, <4 x i16> %input) {
|
||||
; CHECK-LABEL: trunc_unsigned_unscaled_i16_i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vldrb.u32 q1, [r1]
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r1, r3, d0
|
||||
; CHECK-NEXT: vmov r4, r5, d1
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r0, r12, d2
|
||||
; CHECK-NEXT: vmov r2, lr, d3
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: strb.w r3, [r12]
|
||||
; CHECK-NEXT: strb r4, [r2]
|
||||
; CHECK-NEXT: strb.w r5, [lr]
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%offs = load <4 x i8>, <4 x i8>* %offptr, align 1
|
||||
%offs.zext = zext <4 x i8> %offs to <4 x i32>
|
||||
|
|
|
@ -20,35 +20,33 @@ entry:
|
|||
define arm_aapcs_vfpcc void @unscaled_v8i8_i8(i8* %base, <8 x i8>* %offptr, <8 x i8> %input) {
|
||||
; CHECK-LABEL: unscaled_v8i8_i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrb.u32 q2, [r1]
|
||||
; CHECK-NEXT: vldrb.u32 q1, [r1, #4]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[0]
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, r0
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vldrb.u32 q1, [r1]
|
||||
; CHECK-NEXT: vmov.u16 r6, q0[0]
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[1]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[2]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[3]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[4]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[5]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[6]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[7]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: vmov r12, lr, d3
|
||||
; CHECK-NEXT: vldrb.u32 q1, [r1, #4]
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: vmov r4, r5, d3
|
||||
; CHECK-NEXT: strb r6, [r2]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[1]
|
||||
; CHECK-NEXT: strb r2, [r3]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[2]
|
||||
; CHECK-NEXT: strb.w r2, [r12]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[3]
|
||||
; CHECK-NEXT: strb.w r2, [lr]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[4]
|
||||
; CHECK-NEXT: strb r2, [r0]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[5]
|
||||
; CHECK-NEXT: strb r0, [r1]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[6]
|
||||
; CHECK-NEXT: strb r0, [r4]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[7]
|
||||
; CHECK-NEXT: strb r0, [r5]
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%offs = load <8 x i8>, <8 x i8>* %offptr, align 1
|
||||
%offs.zext = zext <8 x i8> %offs to <8 x i32>
|
||||
|
@ -85,66 +83,57 @@ entry:
|
|||
define arm_aapcs_vfpcc void @unscaled_v16i8_sext(i8* %base, <16 x i8>* %offptr, <16 x i8> %input) {
|
||||
; CHECK-LABEL: unscaled_v16i8_sext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vldrb.s32 q4, [r1]
|
||||
; CHECK-NEXT: vldrb.s32 q1, [r1, #12]
|
||||
; CHECK-NEXT: vldrb.s32 q2, [r1, #8]
|
||||
; CHECK-NEXT: vldrb.s32 q3, [r1, #4]
|
||||
; CHECK-NEXT: vadd.i32 q4, q4, r0
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: vldrb.s32 q1, [r1]
|
||||
; CHECK-NEXT: vldrb.s32 q3, [r1, #8]
|
||||
; CHECK-NEXT: vmov.u8 r6, q0[0]
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[4]
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, r0
|
||||
; CHECK-NEXT: vadd.i32 q3, q3, r0
|
||||
; CHECK-NEXT: vmov r0, s16
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[0]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s17
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[1]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[2]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s19
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[3]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[4]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s13
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[5]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[6]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[7]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[8]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[9]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[10]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[11]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: vmov.u8 r7, q0[6]
|
||||
; CHECK-NEXT: vmov r12, lr, d3
|
||||
; CHECK-NEXT: vldrb.s32 q1, [r1, #4]
|
||||
; CHECK-NEXT: vadd.i32 q2, q1, r0
|
||||
; CHECK-NEXT: vldrb.s32 q1, [r1, #12]
|
||||
; CHECK-NEXT: vmov r4, r8, d4
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r0, r9, d5
|
||||
; CHECK-NEXT: strb r6, [r2]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[1]
|
||||
; CHECK-NEXT: strb r2, [r3]
|
||||
; CHECK-NEXT: vmov.u8 r6, q0[2]
|
||||
; CHECK-NEXT: vmov r2, r10, d6
|
||||
; CHECK-NEXT: strb.w r6, [r12]
|
||||
; CHECK-NEXT: vmov.u8 r6, q0[3]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[8]
|
||||
; CHECK-NEXT: strb.w r6, [lr]
|
||||
; CHECK-NEXT: vmov r6, r1, d7
|
||||
; CHECK-NEXT: strb r5, [r4]
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[5]
|
||||
; CHECK-NEXT: strb.w r5, [r8]
|
||||
; CHECK-NEXT: vmov r5, r4, d2
|
||||
; CHECK-NEXT: strb r7, [r0]
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[7]
|
||||
; CHECK-NEXT: strb.w r0, [r9]
|
||||
; CHECK-NEXT: vmov r0, r7, d3
|
||||
; CHECK-NEXT: strb r3, [r2]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[9]
|
||||
; CHECK-NEXT: strb.w r2, [r10]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[10]
|
||||
; CHECK-NEXT: strb r2, [r6]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[11]
|
||||
; CHECK-NEXT: strb r2, [r1]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[12]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: strb r1, [r5]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[13]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: strb r1, [r4]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[14]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[15]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[15]
|
||||
; CHECK-NEXT: strb r0, [r7]
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
|
||||
entry:
|
||||
%offs = load <16 x i8>, <16 x i8>* %offptr, align 1
|
||||
%offs.sext = sext <16 x i8> %offs to <16 x i32>
|
||||
|
@ -157,66 +146,57 @@ entry:
|
|||
define arm_aapcs_vfpcc void @unscaled_v16i8_i16(i8* %base, <16 x i16>* %offptr, <16 x i8> %input) {
|
||||
; CHECK-LABEL: unscaled_v16i8_i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vldrh.s32 q4, [r1]
|
||||
; CHECK-NEXT: vldrh.s32 q1, [r1, #24]
|
||||
; CHECK-NEXT: vldrh.s32 q2, [r1, #16]
|
||||
; CHECK-NEXT: vldrh.s32 q3, [r1, #8]
|
||||
; CHECK-NEXT: vadd.i32 q4, q4, r0
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: vldrh.s32 q1, [r1]
|
||||
; CHECK-NEXT: vldrh.s32 q3, [r1, #16]
|
||||
; CHECK-NEXT: vmov.u8 r6, q0[0]
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[4]
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, r0
|
||||
; CHECK-NEXT: vadd.i32 q3, q3, r0
|
||||
; CHECK-NEXT: vmov r0, s16
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[0]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s17
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[1]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[2]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s19
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[3]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[4]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s13
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[5]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[6]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[7]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[8]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[9]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[10]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[11]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: vmov.u8 r7, q0[6]
|
||||
; CHECK-NEXT: vmov r12, lr, d3
|
||||
; CHECK-NEXT: vldrh.s32 q1, [r1, #8]
|
||||
; CHECK-NEXT: vadd.i32 q2, q1, r0
|
||||
; CHECK-NEXT: vldrh.s32 q1, [r1, #24]
|
||||
; CHECK-NEXT: vmov r4, r8, d4
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r0, r9, d5
|
||||
; CHECK-NEXT: strb r6, [r2]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[1]
|
||||
; CHECK-NEXT: strb r2, [r3]
|
||||
; CHECK-NEXT: vmov.u8 r6, q0[2]
|
||||
; CHECK-NEXT: vmov r2, r10, d6
|
||||
; CHECK-NEXT: strb.w r6, [r12]
|
||||
; CHECK-NEXT: vmov.u8 r6, q0[3]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[8]
|
||||
; CHECK-NEXT: strb.w r6, [lr]
|
||||
; CHECK-NEXT: vmov r6, r1, d7
|
||||
; CHECK-NEXT: strb r5, [r4]
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[5]
|
||||
; CHECK-NEXT: strb.w r5, [r8]
|
||||
; CHECK-NEXT: vmov r5, r4, d2
|
||||
; CHECK-NEXT: strb r7, [r0]
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[7]
|
||||
; CHECK-NEXT: strb.w r0, [r9]
|
||||
; CHECK-NEXT: vmov r0, r7, d3
|
||||
; CHECK-NEXT: strb r3, [r2]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[9]
|
||||
; CHECK-NEXT: strb.w r2, [r10]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[10]
|
||||
; CHECK-NEXT: strb r2, [r6]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[11]
|
||||
; CHECK-NEXT: strb r2, [r1]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[12]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: strb r1, [r5]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[13]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: strb r1, [r4]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[14]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[15]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[15]
|
||||
; CHECK-NEXT: strb r0, [r7]
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
|
||||
entry:
|
||||
%offs = load <16 x i16>, <16 x i16>* %offptr, align 2
|
||||
%offs.sext = sext <16 x i16> %offs to <16 x i32>
|
||||
|
@ -229,70 +209,61 @@ entry:
|
|||
define arm_aapcs_vfpcc void @unscaled_v16i8_scaled(i32* %base, <16 x i8>* %offptr, <16 x i8> %input) {
|
||||
; CHECK-LABEL: unscaled_v16i8_scaled:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vldrb.u32 q4, [r1]
|
||||
; CHECK-NEXT: vldrb.u32 q1, [r1, #12]
|
||||
; CHECK-NEXT: vldrb.u32 q2, [r1, #8]
|
||||
; CHECK-NEXT: vldrb.u32 q3, [r1, #4]
|
||||
; CHECK-NEXT: vshl.i32 q4, q4, #2
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: vldrb.u32 q1, [r1]
|
||||
; CHECK-NEXT: vldrb.u32 q3, [r1, #8]
|
||||
; CHECK-NEXT: vmov.u8 r6, q0[0]
|
||||
; CHECK-NEXT: vmov.u8 r7, q0[4]
|
||||
; CHECK-NEXT: vshl.i32 q1, q1, #2
|
||||
; CHECK-NEXT: vshl.i32 q2, q2, #2
|
||||
; CHECK-NEXT: vshl.i32 q3, q3, #2
|
||||
; CHECK-NEXT: vadd.i32 q4, q4, r0
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, r0
|
||||
; CHECK-NEXT: vadd.i32 q3, q3, r0
|
||||
; CHECK-NEXT: vmov r0, s16
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[0]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s17
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[1]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[2]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s19
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[3]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[4]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s13
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[5]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: vmov r12, lr, d3
|
||||
; CHECK-NEXT: vldrb.u32 q1, [r1, #4]
|
||||
; CHECK-NEXT: vshl.i32 q1, q1, #2
|
||||
; CHECK-NEXT: vadd.i32 q2, q1, r0
|
||||
; CHECK-NEXT: vldrb.u32 q1, [r1, #12]
|
||||
; CHECK-NEXT: vmov r4, r8, d4
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[6]
|
||||
; CHECK-NEXT: vshl.i32 q1, q1, #2
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r0, r9, d5
|
||||
; CHECK-NEXT: strb r6, [r2]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[1]
|
||||
; CHECK-NEXT: strb r2, [r3]
|
||||
; CHECK-NEXT: vmov.u8 r6, q0[2]
|
||||
; CHECK-NEXT: vmov r2, r10, d6
|
||||
; CHECK-NEXT: strb.w r6, [r12]
|
||||
; CHECK-NEXT: vmov.u8 r6, q0[3]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[8]
|
||||
; CHECK-NEXT: strb.w r6, [lr]
|
||||
; CHECK-NEXT: vmov r6, r5, d7
|
||||
; CHECK-NEXT: strb r7, [r4]
|
||||
; CHECK-NEXT: vmov.u8 r7, q0[5]
|
||||
; CHECK-NEXT: strb.w r7, [r8]
|
||||
; CHECK-NEXT: vmov r7, r4, d2
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[7]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[8]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[9]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[10]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[11]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[12]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[13]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[14]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[15]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[7]
|
||||
; CHECK-NEXT: strb.w r0, [r9]
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: strb r3, [r2]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[9]
|
||||
; CHECK-NEXT: strb.w r2, [r10]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[10]
|
||||
; CHECK-NEXT: strb r2, [r6]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[11]
|
||||
; CHECK-NEXT: strb r2, [r5]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[12]
|
||||
; CHECK-NEXT: strb r2, [r7]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[13]
|
||||
; CHECK-NEXT: strb r2, [r4]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[14]
|
||||
; CHECK-NEXT: strb r2, [r0]
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[15]
|
||||
; CHECK-NEXT: strb r0, [r1]
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
|
||||
entry:
|
||||
%offs = load <16 x i8>, <16 x i8>* %offptr, align 4
|
||||
%offs.zext = zext <16 x i8> %offs to <16 x i32>
|
||||
|
@ -306,66 +277,57 @@ entry:
|
|||
define arm_aapcs_vfpcc void @unscaled_v16i8_i8_next(i8* %base, <16 x i32>* %offptr, <16 x i8> %input) {
|
||||
; CHECK-LABEL: unscaled_v16i8_i8_next:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vldrw.u32 q4, [r1]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r1, #48]
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r1, #32]
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r1, #16]
|
||||
; CHECK-NEXT: vadd.i32 q4, q4, r0
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r1]
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r1, #32]
|
||||
; CHECK-NEXT: vmov.u8 r6, q0[0]
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[4]
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, r0
|
||||
; CHECK-NEXT: vadd.i32 q3, q3, r0
|
||||
; CHECK-NEXT: vmov r0, s16
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[0]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s17
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[1]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[2]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s19
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[3]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[4]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s13
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[5]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[6]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[7]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[8]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[9]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[10]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[11]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: vmov.u8 r7, q0[6]
|
||||
; CHECK-NEXT: vmov r12, lr, d3
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r1, #16]
|
||||
; CHECK-NEXT: vadd.i32 q2, q1, r0
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r1, #48]
|
||||
; CHECK-NEXT: vmov r4, r8, d4
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r0, r9, d5
|
||||
; CHECK-NEXT: strb r6, [r2]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[1]
|
||||
; CHECK-NEXT: strb r2, [r3]
|
||||
; CHECK-NEXT: vmov.u8 r6, q0[2]
|
||||
; CHECK-NEXT: vmov r2, r10, d6
|
||||
; CHECK-NEXT: strb.w r6, [r12]
|
||||
; CHECK-NEXT: vmov.u8 r6, q0[3]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[8]
|
||||
; CHECK-NEXT: strb.w r6, [lr]
|
||||
; CHECK-NEXT: vmov r6, r1, d7
|
||||
; CHECK-NEXT: strb r5, [r4]
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[5]
|
||||
; CHECK-NEXT: strb.w r5, [r8]
|
||||
; CHECK-NEXT: vmov r5, r4, d2
|
||||
; CHECK-NEXT: strb r7, [r0]
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[7]
|
||||
; CHECK-NEXT: strb.w r0, [r9]
|
||||
; CHECK-NEXT: vmov r0, r7, d3
|
||||
; CHECK-NEXT: strb r3, [r2]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[9]
|
||||
; CHECK-NEXT: strb.w r2, [r10]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[10]
|
||||
; CHECK-NEXT: strb r2, [r6]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[11]
|
||||
; CHECK-NEXT: strb r2, [r1]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[12]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: strb r1, [r5]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[13]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: strb r1, [r4]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[14]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[15]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[15]
|
||||
; CHECK-NEXT: strb r0, [r7]
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
|
||||
entry:
|
||||
%offs = load <16 x i32>, <16 x i32>* %offptr, align 4
|
||||
%ptrs = getelementptr inbounds i8, i8* %base, <16 x i32> %offs
|
||||
|
@ -438,44 +400,38 @@ entry:
|
|||
define arm_aapcs_vfpcc void @trunc_unsigned_unscaled_i32_i8(i8* %base, <16 x i8>* %offptr, <16 x i32> %input) {
|
||||
; CHECK-LABEL: trunc_unsigned_unscaled_i32_i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov.8 q4[0], r3
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov.8 q4[1], r3
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov.8 q4[2], r3
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: vmov.8 q4[3], r3
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: vmov.8 q4[4], r3
|
||||
; CHECK-NEXT: vmov r3, s5
|
||||
; CHECK-NEXT: vmov.8 q4[5], r3
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: vmov.8 q4[6], r3
|
||||
; CHECK-NEXT: vmov r3, s7
|
||||
; CHECK-NEXT: vmov.8 q4[7], r3
|
||||
; CHECK-NEXT: vmov r3, s8
|
||||
; CHECK-NEXT: vmov.8 q4[8], r3
|
||||
; CHECK-NEXT: vmov r3, s9
|
||||
; CHECK-NEXT: vmov.8 q4[9], r3
|
||||
; CHECK-NEXT: vmov r3, s10
|
||||
; CHECK-NEXT: vmov.8 q4[10], r3
|
||||
; CHECK-NEXT: vmov r3, s11
|
||||
; CHECK-NEXT: vmov.8 q4[11], r3
|
||||
; CHECK-NEXT: vmov r3, s12
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: vmov.8 q4[0], r4
|
||||
; CHECK-NEXT: vmov lr, r12, d7
|
||||
; CHECK-NEXT: vmov r3, r2, d6
|
||||
; CHECK-NEXT: vldrb.u8 q3, [r1]
|
||||
; CHECK-NEXT: vmov r1, r4, d1
|
||||
; CHECK-NEXT: vmov.8 q4[1], r5
|
||||
; CHECK-NEXT: vmov.8 q4[2], r1
|
||||
; CHECK-NEXT: vmov r1, r5, d2
|
||||
; CHECK-NEXT: vmov.8 q4[3], r4
|
||||
; CHECK-NEXT: vmov.8 q4[4], r1
|
||||
; CHECK-NEXT: vmov r1, r4, d3
|
||||
; CHECK-NEXT: vmov.8 q4[5], r5
|
||||
; CHECK-NEXT: vmov.8 q4[6], r1
|
||||
; CHECK-NEXT: vmov r1, r5, d4
|
||||
; CHECK-NEXT: vmov.8 q4[7], r4
|
||||
; CHECK-NEXT: vmov.8 q4[8], r1
|
||||
; CHECK-NEXT: vmov r1, r4, d5
|
||||
; CHECK-NEXT: vmov.8 q4[9], r5
|
||||
; CHECK-NEXT: vmov.8 q4[10], r1
|
||||
; CHECK-NEXT: vmov.8 q4[11], r4
|
||||
; CHECK-NEXT: vmov.8 q4[12], r3
|
||||
; CHECK-NEXT: vmov r3, s13
|
||||
; CHECK-NEXT: vmov.8 q4[13], r3
|
||||
; CHECK-NEXT: vmov r3, s14
|
||||
; CHECK-NEXT: vmov r2, s15
|
||||
; CHECK-NEXT: vmov.8 q4[14], r3
|
||||
; CHECK-NEXT: vldrb.u8 q0, [r1]
|
||||
; CHECK-NEXT: vmov.8 q4[15], r2
|
||||
; CHECK-NEXT: vstrb.8 q4, [r0, q0]
|
||||
; CHECK-NEXT: vmov.8 q4[13], r2
|
||||
; CHECK-NEXT: vmov.8 q4[14], lr
|
||||
; CHECK-NEXT: vmov.8 q4[15], r12
|
||||
; CHECK-NEXT: vstrb.8 q4, [r0, q3]
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%offs = load <16 x i8>, <16 x i8>* %offptr, align 1
|
||||
%offs.zext = zext <16 x i8> %offs to <16 x i32>
|
||||
|
@ -536,71 +492,65 @@ entry:
|
|||
define arm_aapcs_vfpcc void @unscaled_v16i8_i8_2gep(i8* %base, <16 x i8>* %offptr, <16 x i8> %input) {
|
||||
; CHECK-LABEL: unscaled_v16i8_i8_2gep:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vldrb.s32 q1, [r1, #12]
|
||||
; CHECK-NEXT: vldrb.s32 q2, [r1, #8]
|
||||
; CHECK-NEXT: vldrb.s32 q3, [r1, #4]
|
||||
; CHECK-NEXT: vldrb.s32 q5, [r1]
|
||||
; CHECK-NEXT: vmov.i32 q4, #0x5
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vldrb.s32 q2, [r1]
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x5
|
||||
; CHECK-NEXT: vldrb.s32 q4, [r1, #8]
|
||||
; CHECK-NEXT: vmov.u8 r6, q0[0]
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, r0
|
||||
; CHECK-NEXT: vadd.i32 q3, q3, r0
|
||||
; CHECK-NEXT: vadd.i32 q5, q5, r0
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, q4
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, q4
|
||||
; CHECK-NEXT: vadd.i32 q3, q3, q4
|
||||
; CHECK-NEXT: vadd.i32 q4, q5, q4
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[0]
|
||||
; CHECK-NEXT: vmov r0, s16
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s17
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[1]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[2]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s19
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[3]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[4]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s13
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[5]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[6]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[7]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[8]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[9]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[10]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[11]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[4]
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, q1
|
||||
; CHECK-NEXT: vadd.i32 q4, q4, r0
|
||||
; CHECK-NEXT: vmov r2, r3, d4
|
||||
; CHECK-NEXT: vmov.u8 r7, q0[6]
|
||||
; CHECK-NEXT: vmov r12, lr, d5
|
||||
; CHECK-NEXT: vldrb.s32 q2, [r1, #4]
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, r0
|
||||
; CHECK-NEXT: vadd.i32 q3, q2, q1
|
||||
; CHECK-NEXT: vldrb.s32 q2, [r1, #12]
|
||||
; CHECK-NEXT: vmov r4, r8, d6
|
||||
; CHECK-NEXT: vadd.i32 q2, q2, r0
|
||||
; CHECK-NEXT: vmov r0, r9, d7
|
||||
; CHECK-NEXT: vadd.i32 q3, q4, q1
|
||||
; CHECK-NEXT: vadd.i32 q1, q2, q1
|
||||
; CHECK-NEXT: strb r6, [r2]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[1]
|
||||
; CHECK-NEXT: strb r2, [r3]
|
||||
; CHECK-NEXT: vmov.u8 r6, q0[2]
|
||||
; CHECK-NEXT: vmov r2, r10, d6
|
||||
; CHECK-NEXT: strb.w r6, [r12]
|
||||
; CHECK-NEXT: vmov.u8 r6, q0[3]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[8]
|
||||
; CHECK-NEXT: strb.w r6, [lr]
|
||||
; CHECK-NEXT: vmov r6, r1, d7
|
||||
; CHECK-NEXT: strb r5, [r4]
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[5]
|
||||
; CHECK-NEXT: strb.w r5, [r8]
|
||||
; CHECK-NEXT: vmov r5, r4, d2
|
||||
; CHECK-NEXT: strb r7, [r0]
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[7]
|
||||
; CHECK-NEXT: strb.w r0, [r9]
|
||||
; CHECK-NEXT: vmov r0, r7, d3
|
||||
; CHECK-NEXT: strb r3, [r2]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[9]
|
||||
; CHECK-NEXT: strb.w r2, [r10]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[10]
|
||||
; CHECK-NEXT: strb r2, [r6]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[11]
|
||||
; CHECK-NEXT: strb r2, [r1]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[12]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: strb r1, [r5]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[13]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: strb r1, [r4]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[14]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[15]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[15]
|
||||
; CHECK-NEXT: strb r0, [r7]
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
|
||||
entry:
|
||||
%offs = load <16 x i8>, <16 x i8>* %offptr, align 1
|
||||
%ptrs = getelementptr inbounds i8, i8* %base, <16 x i8> %offs
|
||||
|
|
|
@ -36,33 +36,27 @@ entry:
|
|||
define arm_aapcs_vfpcc void @ptr_v8i32(<8 x i32> %v, <8 x i32*>* %offptr) {
|
||||
; CHECK-LABEL: ptr_v8i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r0]
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0]
|
||||
; CHECK-NEXT: vmov r3, r4, d0
|
||||
; CHECK-NEXT: vmov r1, r2, d4
|
||||
; CHECK-NEXT: vmov lr, r12, d5
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #16]
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s13
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov r0, r5, d1
|
||||
; CHECK-NEXT: str r3, [r1]
|
||||
; CHECK-NEXT: vmov r1, r7, d4
|
||||
; CHECK-NEXT: str r4, [r2]
|
||||
; CHECK-NEXT: vmov r2, r4, d5
|
||||
; CHECK-NEXT: str.w r0, [lr]
|
||||
; CHECK-NEXT: vmov r0, r3, d2
|
||||
; CHECK-NEXT: str.w r5, [r12]
|
||||
; CHECK-NEXT: vmov r5, r6, d3
|
||||
; CHECK-NEXT: str r0, [r1]
|
||||
; CHECK-NEXT: str r3, [r7]
|
||||
; CHECK-NEXT: str r5, [r2]
|
||||
; CHECK-NEXT: str r6, [r4]
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
entry:
|
||||
%offs = load <8 x i32*>, <8 x i32*>* %offptr, align 4
|
||||
call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %v, <8 x i32*> %offs, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
|
||||
|
@ -73,62 +67,51 @@ entry:
|
|||
define arm_aapcs_vfpcc void @ptr_v16i32(<16 x i32> %v, <16 x i32*>* %offptr) {
|
||||
; CHECK-LABEL: ptr_v16i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: vldrw.u32 q7, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q4, [r0, #48]
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: .pad #4
|
||||
; CHECK-NEXT: sub sp, #4
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NEXT: vldrw.u32 q4, [r0]
|
||||
; CHECK-NEXT: vmov r3, r4, d0
|
||||
; CHECK-NEXT: vldrw.u32 q5, [r0, #32]
|
||||
; CHECK-NEXT: vldrw.u32 q6, [r0, #16]
|
||||
; CHECK-NEXT: vmov r0, s28
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s29
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s30
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s31
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s24
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s25
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s26
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s27
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s20
|
||||
; CHECK-NEXT: vmov r1, s8
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s21
|
||||
; CHECK-NEXT: vmov r1, s9
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s22
|
||||
; CHECK-NEXT: vmov r1, s10
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s23
|
||||
; CHECK-NEXT: vmov r1, s11
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s16
|
||||
; CHECK-NEXT: vmov r1, s12
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s17
|
||||
; CHECK-NEXT: vmov r1, s13
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov r1, s14
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s19
|
||||
; CHECK-NEXT: vmov r1, s15
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov r1, r2, d8
|
||||
; CHECK-NEXT: vmov lr, r12, d9
|
||||
; CHECK-NEXT: vldrw.u32 q4, [r0, #48]
|
||||
; CHECK-NEXT: vmov r0, r5, d1
|
||||
; CHECK-NEXT: str r3, [r1]
|
||||
; CHECK-NEXT: vmov r1, r3, d12
|
||||
; CHECK-NEXT: str r4, [r2]
|
||||
; CHECK-NEXT: vmov r2, r7, d13
|
||||
; CHECK-NEXT: str.w r0, [lr]
|
||||
; CHECK-NEXT: vmov r0, r4, d2
|
||||
; CHECK-NEXT: str.w r5, [r12]
|
||||
; CHECK-NEXT: vmov r5, r6, d3
|
||||
; CHECK-NEXT: str r0, [r1]
|
||||
; CHECK-NEXT: vmov r0, r1, d10
|
||||
; CHECK-NEXT: str r4, [r3]
|
||||
; CHECK-NEXT: vmov r3, r4, d11
|
||||
; CHECK-NEXT: str r5, [r2]
|
||||
; CHECK-NEXT: vmov r2, r5, d4
|
||||
; CHECK-NEXT: str r6, [r7]
|
||||
; CHECK-NEXT: vmov r7, r6, d5
|
||||
; CHECK-NEXT: str r2, [r0]
|
||||
; CHECK-NEXT: vmov r0, r2, d8
|
||||
; CHECK-NEXT: str r5, [r1]
|
||||
; CHECK-NEXT: vmov r1, r5, d9
|
||||
; CHECK-NEXT: str r7, [r3]
|
||||
; CHECK-NEXT: vmov r3, r7, d6
|
||||
; CHECK-NEXT: str r6, [r4]
|
||||
; CHECK-NEXT: vmov r6, r4, d7
|
||||
; CHECK-NEXT: str r3, [r0]
|
||||
; CHECK-NEXT: str r7, [r2]
|
||||
; CHECK-NEXT: str r6, [r1]
|
||||
; CHECK-NEXT: str r4, [r5]
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NEXT: add sp, #4
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
entry:
|
||||
%offs = load <16 x i32*>, <16 x i32*>* %offptr, align 4
|
||||
call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %v, <16 x i32*> %offs, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
|
||||
|
@ -170,24 +153,20 @@ define arm_aapcs_vfpcc void @ptr_v8f32(<8 x float> %v, <8 x float*>* %offptr) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #16]
|
||||
; CHECK-NEXT: vmov r12, s11
|
||||
; CHECK-NEXT: vmov lr, s10
|
||||
; CHECK-NEXT: vmov r3, s9
|
||||
; CHECK-NEXT: vmov r1, s8
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0]
|
||||
; CHECK-NEXT: vmov r5, s8
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov r2, s10
|
||||
; CHECK-NEXT: vmov r4, s9
|
||||
; CHECK-NEXT: vstr s0, [r5]
|
||||
; CHECK-NEXT: vstr s1, [r4]
|
||||
; CHECK-NEXT: vstr s2, [r2]
|
||||
; CHECK-NEXT: vstr s3, [r0]
|
||||
; CHECK-NEXT: vstr s4, [r1]
|
||||
; CHECK-NEXT: vstr s5, [r3]
|
||||
; CHECK-NEXT: vstr s6, [lr]
|
||||
; CHECK-NEXT: vstr s7, [r12]
|
||||
; CHECK-NEXT: vmov r1, lr, d4
|
||||
; CHECK-NEXT: vmov r3, r12, d5
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #16]
|
||||
; CHECK-NEXT: vmov r0, r2, d4
|
||||
; CHECK-NEXT: vmov r4, r5, d5
|
||||
; CHECK-NEXT: vstr s0, [r1]
|
||||
; CHECK-NEXT: vstr s1, [lr]
|
||||
; CHECK-NEXT: vstr s2, [r3]
|
||||
; CHECK-NEXT: vstr s3, [r12]
|
||||
; CHECK-NEXT: vstr s4, [r0]
|
||||
; CHECK-NEXT: vstr s5, [r2]
|
||||
; CHECK-NEXT: vstr s6, [r4]
|
||||
; CHECK-NEXT: vstr s7, [r5]
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%offs = load <8 x float*>, <8 x float*>* %offptr, align 4
|
||||
|
@ -201,33 +180,31 @@ entry:
|
|||
define arm_aapcs_vfpcc void @ptr_i16(<8 x i16> %v, <8 x i16*>* %offptr) {
|
||||
; CHECK-LABEL: ptr_i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0]
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vmov.u16 r6, q0[0]
|
||||
; CHECK-NEXT: vmov r1, r2, d2
|
||||
; CHECK-NEXT: vmov r3, r12, d3
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[0]
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov r0, lr, d2
|
||||
; CHECK-NEXT: vmov r4, r5, d3
|
||||
; CHECK-NEXT: strh r6, [r1]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[1]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: strh r1, [r2]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[2]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: strh r1, [r3]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[3]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: strh.w r1, [r12]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[4]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[5]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[6]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[7]
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[5]
|
||||
; CHECK-NEXT: strh.w r0, [lr]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[6]
|
||||
; CHECK-NEXT: strh r0, [r4]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[7]
|
||||
; CHECK-NEXT: strh r0, [r5]
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%offs = load <8 x i16*>, <8 x i16*>* %offptr, align 4
|
||||
call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %v, <8 x i16*> %offs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
|
||||
|
@ -255,20 +232,18 @@ entry:
|
|||
define arm_aapcs_vfpcc void @ptr_v4i16_trunc(<4 x i32> %v, <4 x i16*>* %offptr) {
|
||||
; CHECK-LABEL: ptr_v4i16_trunc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r1, r3, d0
|
||||
; CHECK-NEXT: vmov r4, r5, d1
|
||||
; CHECK-NEXT: vmov r0, r12, d2
|
||||
; CHECK-NEXT: vmov r2, lr, d3
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: strh.w r3, [r12]
|
||||
; CHECK-NEXT: strh r4, [r2]
|
||||
; CHECK-NEXT: strh.w r5, [lr]
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%offs = load <4 x i16*>, <4 x i16*>* %offptr, align 4
|
||||
%ext = trunc <4 x i32> %v to <4 x i16>
|
||||
|
@ -280,33 +255,27 @@ entry:
|
|||
define arm_aapcs_vfpcc void @ptr_v8i16_trunc(<8 x i32> %v, <8 x i16*>* %offptr) {
|
||||
; CHECK-LABEL: ptr_v8i16_trunc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r0]
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0]
|
||||
; CHECK-NEXT: vmov r3, r4, d0
|
||||
; CHECK-NEXT: vmov r1, r2, d4
|
||||
; CHECK-NEXT: vmov lr, r12, d5
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #16]
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s13
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: strh r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov r0, r5, d1
|
||||
; CHECK-NEXT: strh r3, [r1]
|
||||
; CHECK-NEXT: vmov r1, r7, d4
|
||||
; CHECK-NEXT: strh r4, [r2]
|
||||
; CHECK-NEXT: vmov r2, r4, d5
|
||||
; CHECK-NEXT: strh.w r0, [lr]
|
||||
; CHECK-NEXT: vmov r0, r3, d2
|
||||
; CHECK-NEXT: strh.w r5, [r12]
|
||||
; CHECK-NEXT: vmov r5, r6, d3
|
||||
; CHECK-NEXT: strh r0, [r1]
|
||||
; CHECK-NEXT: strh r3, [r7]
|
||||
; CHECK-NEXT: strh r5, [r2]
|
||||
; CHECK-NEXT: strh r6, [r4]
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
entry:
|
||||
%offs = load <8 x i16*>, <8 x i16*>* %offptr, align 4
|
||||
%ext = trunc <8 x i32> %v to <8 x i16>
|
||||
|
@ -323,25 +292,21 @@ define arm_aapcs_vfpcc void @ptr_f16(<8 x half> %v, <8 x half*>* %offptr) {
|
|||
; CHECK-NEXT: vldrw.u32 q2, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vmovx.f16 s12, s0
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: vmov r0, r1, d4
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vstr.16 s12, [r0]
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vstr.16 s1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vstr.16 s12, [r1]
|
||||
; CHECK-NEXT: vmov r0, r1, d5
|
||||
; CHECK-NEXT: vmovx.f16 s8, s1
|
||||
; CHECK-NEXT: vmovx.f16 s0, s3
|
||||
; CHECK-NEXT: vstr.16 s8, [r0]
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vstr.16 s2, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vstr.16 s1, [r0]
|
||||
; CHECK-NEXT: vstr.16 s8, [r1]
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: vmovx.f16 s8, s2
|
||||
; CHECK-NEXT: vstr.16 s8, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vstr.16 s2, [r0]
|
||||
; CHECK-NEXT: vstr.16 s8, [r1]
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: vmovx.f16 s0, s3
|
||||
; CHECK-NEXT: vstr.16 s3, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: vstr.16 s0, [r1]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%offs = load <8 x half*>, <8 x half*>* %offptr, align 4
|
||||
|
@ -355,62 +320,53 @@ entry:
|
|||
define arm_aapcs_vfpcc void @ptr_i8(<16 x i8> %v, <16 x i8*>* %offptr) {
|
||||
; CHECK-LABEL: ptr_i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vldrw.u32 q4, [r0]
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #16]
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r0, #32]
|
||||
; CHECK-NEXT: vmov.u8 r6, q0[0]
|
||||
; CHECK-NEXT: vmov r1, r2, d2
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[4]
|
||||
; CHECK-NEXT: vmov r3, r12, d3
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #48]
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #32]
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r0, #16]
|
||||
; CHECK-NEXT: vmov r0, s16
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[0]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s17
|
||||
; CHECK-NEXT: vmov lr, r4, d4
|
||||
; CHECK-NEXT: vmov.u8 r7, q0[6]
|
||||
; CHECK-NEXT: vmov r0, r8, d5
|
||||
; CHECK-NEXT: strb r6, [r1]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[1]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[2]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s19
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[3]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[4]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s13
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[5]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[6]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[7]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[8]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: strb r1, [r2]
|
||||
; CHECK-NEXT: vmov.u8 r6, q0[2]
|
||||
; CHECK-NEXT: vmov r1, r9, d6
|
||||
; CHECK-NEXT: strb r6, [r3]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[3]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[8]
|
||||
; CHECK-NEXT: strb.w r3, [r12]
|
||||
; CHECK-NEXT: vmov r3, r6, d7
|
||||
; CHECK-NEXT: strb.w r5, [lr]
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[5]
|
||||
; CHECK-NEXT: strb r5, [r4]
|
||||
; CHECK-NEXT: vmov r5, r4, d2
|
||||
; CHECK-NEXT: strb r7, [r0]
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[7]
|
||||
; CHECK-NEXT: strb.w r0, [r8]
|
||||
; CHECK-NEXT: vmov r0, r7, d3
|
||||
; CHECK-NEXT: strb r2, [r1]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[9]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: strb.w r1, [r9]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[10]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: strb r1, [r3]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[11]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: strb r1, [r6]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[12]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: strb r1, [r5]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[13]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: strb r1, [r4]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[14]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[15]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[15]
|
||||
; CHECK-NEXT: strb r0, [r7]
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
|
||||
entry:
|
||||
%offs = load <16 x i8*>, <16 x i8*>* %offptr, align 4
|
||||
call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %v, <16 x i8*> %offs, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
|
||||
|
@ -421,33 +377,31 @@ entry:
|
|||
define arm_aapcs_vfpcc void @ptr_v8i8_trunc16(<8 x i16> %v, <8 x i8*>* %offptr) {
|
||||
; CHECK-LABEL: ptr_v8i8_trunc16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0]
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vmov.u16 r6, q0[0]
|
||||
; CHECK-NEXT: vmov r1, r2, d2
|
||||
; CHECK-NEXT: vmov r3, r12, d3
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[0]
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov r0, lr, d2
|
||||
; CHECK-NEXT: vmov r4, r5, d3
|
||||
; CHECK-NEXT: strb r6, [r1]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[1]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: strb r1, [r2]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[2]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: strb r1, [r3]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[3]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: strb.w r1, [r12]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[4]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[5]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[6]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[7]
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[5]
|
||||
; CHECK-NEXT: strb.w r0, [lr]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[6]
|
||||
; CHECK-NEXT: strb r0, [r4]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[7]
|
||||
; CHECK-NEXT: strb r0, [r5]
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%offs = load <8 x i8*>, <8 x i8*>* %offptr, align 4
|
||||
%ext = trunc <8 x i16> %v to <8 x i8>
|
||||
|
@ -459,20 +413,18 @@ entry:
|
|||
define arm_aapcs_vfpcc void @ptr_v4i8_trunc32(<4 x i32> %v, <4 x i8*>* %offptr) {
|
||||
; CHECK-LABEL: ptr_v4i8_trunc32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r1, r3, d0
|
||||
; CHECK-NEXT: vmov r4, r5, d1
|
||||
; CHECK-NEXT: vmov r0, r12, d2
|
||||
; CHECK-NEXT: vmov r2, lr, d3
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: strb.w r3, [r12]
|
||||
; CHECK-NEXT: strb r4, [r2]
|
||||
; CHECK-NEXT: strb.w r5, [lr]
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%offs = load <4 x i8*>, <4 x i8*>* %offptr, align 4
|
||||
%ext = trunc <4 x i32> %v to <4 x i8>
|
||||
|
@ -484,33 +436,27 @@ entry:
|
|||
define arm_aapcs_vfpcc void @ptr_v8i8_trunc32(<8 x i32> %v, <8 x i8*>* %offptr) {
|
||||
; CHECK-LABEL: ptr_v8i8_trunc32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r0]
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0]
|
||||
; CHECK-NEXT: vmov r3, r4, d0
|
||||
; CHECK-NEXT: vmov r1, r2, d4
|
||||
; CHECK-NEXT: vmov lr, r12, d5
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #16]
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s13
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: strb r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov r0, r5, d1
|
||||
; CHECK-NEXT: strb r3, [r1]
|
||||
; CHECK-NEXT: vmov r1, r7, d4
|
||||
; CHECK-NEXT: strb r4, [r2]
|
||||
; CHECK-NEXT: vmov r2, r4, d5
|
||||
; CHECK-NEXT: strb.w r0, [lr]
|
||||
; CHECK-NEXT: vmov r0, r3, d2
|
||||
; CHECK-NEXT: strb.w r5, [r12]
|
||||
; CHECK-NEXT: vmov r5, r6, d3
|
||||
; CHECK-NEXT: strb r0, [r1]
|
||||
; CHECK-NEXT: strb r3, [r7]
|
||||
; CHECK-NEXT: strb r5, [r2]
|
||||
; CHECK-NEXT: strb r6, [r4]
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
entry:
|
||||
%offs = load <8 x i8*>, <8 x i8*>* %offptr, align 4
|
||||
%ext = trunc <8 x i32> %v to <8 x i8>
|
||||
|
|
|
@ -78,15 +78,12 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @sext_v2i64_v2i64_v2i35(<2 x i64> %m) {
|
||||
; CHECK-LABEL: sext_v2i64_v2i64_v2i35:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: sbfx r0, r0, #0, #3
|
||||
; CHECK-NEXT: sbfx r1, r1, #0, #3
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
|
||||
; CHECK-NEXT: sbfx r0, r1, #0, #3
|
||||
; CHECK-NEXT: sbfx r1, r3, #0, #3
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%shl = shl <2 x i64> %m, <i64 29, i64 29>
|
||||
|
@ -480,22 +477,18 @@ define arm_aapcs_vfpcc <8 x i16> @trunc_v8i32_v8i16(<8 x i32> %src) {
|
|||
; CHECK-LABEL: trunc_v8i32_v8i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov q2, q0
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: vmov r0, r1, d4
|
||||
; CHECK-NEXT: vmov.16 q0[0], r0
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov.16 q0[1], r0
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov.16 q0[1], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d5
|
||||
; CHECK-NEXT: vmov.16 q0[2], r0
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov.16 q0[3], r0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov.16 q0[3], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: vmov.16 q0[4], r0
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.16 q0[5], r0
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.16 q0[5], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: vmov.16 q0[6], r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.16 q0[7], r0
|
||||
; CHECK-NEXT: vmov.16 q0[7], r1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = trunc <8 x i32> %src to <8 x i16>
|
||||
|
@ -508,38 +501,30 @@ define arm_aapcs_vfpcc <16 x i8> @trunc_v16i32_v16i8(<16 x i32> %src) {
|
|||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov q4, q0
|
||||
; CHECK-NEXT: vmov r0, s16
|
||||
; CHECK-NEXT: vmov r0, r1, d8
|
||||
; CHECK-NEXT: vmov.8 q0[0], r0
|
||||
; CHECK-NEXT: vmov r0, s17
|
||||
; CHECK-NEXT: vmov.8 q0[1], r0
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov.8 q0[1], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d9
|
||||
; CHECK-NEXT: vmov.8 q0[2], r0
|
||||
; CHECK-NEXT: vmov r0, s19
|
||||
; CHECK-NEXT: vmov.8 q0[3], r0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov.8 q0[3], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: vmov.8 q0[4], r0
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.8 q0[5], r0
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.8 q0[5], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: vmov.8 q0[6], r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.8 q0[7], r0
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: vmov.8 q0[7], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d4
|
||||
; CHECK-NEXT: vmov.8 q0[8], r0
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov.8 q0[9], r0
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov.8 q0[9], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d5
|
||||
; CHECK-NEXT: vmov.8 q0[10], r0
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov.8 q0[11], r0
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmov.8 q0[11], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d6
|
||||
; CHECK-NEXT: vmov.8 q0[12], r0
|
||||
; CHECK-NEXT: vmov r0, s13
|
||||
; CHECK-NEXT: vmov.8 q0[13], r0
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: vmov.8 q0[13], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d7
|
||||
; CHECK-NEXT: vmov.8 q0[14], r0
|
||||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: vmov.8 q0[15], r0
|
||||
; CHECK-NEXT: vmov.8 q0[15], r1
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
|
|
|
@ -34,17 +34,17 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @shl_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
|
||||
; CHECK-LABEL: shl_qq_int64_t:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r5, lr}
|
||||
; CHECK-NEXT: push {r5, lr}
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r2, r1, d1
|
||||
; CHECK-NEXT: lsll r2, r1, r0
|
||||
; CHECK-NEXT: vmov r12, s4
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: lsll r0, r3, r12
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: vmov r0, r5, d0
|
||||
; CHECK-NEXT: lsll r0, r5, r3
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r5, r1
|
||||
; CHECK-NEXT: pop {r5, pc}
|
||||
entry:
|
||||
%0 = shl <2 x i64> %src1, %src2
|
||||
ret <2 x i64> %0
|
||||
|
@ -91,12 +91,10 @@ define arm_aapcs_vfpcc <2 x i64> @shru_qq_int64_t(<2 x i64> %src1, <2 x i64> %sr
|
|||
; CHECK-NEXT: push {r5, lr}
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: vmov r5, s3
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r0, r5, d1
|
||||
; CHECK-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-NEXT: lsll r0, r5, r2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: lsll r2, r3, r1
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
|
||||
|
@ -144,17 +142,17 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @shrs_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
|
||||
; CHECK-LABEL: shrs_qq_int64_t:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r5, lr}
|
||||
; CHECK-NEXT: push {r5, lr}
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r2, r1, d1
|
||||
; CHECK-NEXT: asrl r2, r1, r0
|
||||
; CHECK-NEXT: vmov r12, s4
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: asrl r0, r3, r12
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: vmov r0, r5, d0
|
||||
; CHECK-NEXT: asrl r0, r5, r3
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r5, r1
|
||||
; CHECK-NEXT: pop {r5, pc}
|
||||
entry:
|
||||
%0 = ashr <2 x i64> %src1, %src2
|
||||
ret <2 x i64> %0
|
||||
|
@ -194,11 +192,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @shl_qi_int64_t(<2 x i64> %src1) {
|
||||
; CHECK-LABEL: shl_qi_int64_t:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: lsll r0, r1, #4
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: lsll r2, r3, #4
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
|
@ -242,11 +238,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @shru_qi_int64_t(<2 x i64> %src1) {
|
||||
; CHECK-LABEL: shru_qi_int64_t:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: lsrl r0, r1, #4
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: lsrl r2, r3, #4
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
|
@ -290,11 +284,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @shrs_qi_int64_t(<2 x i64> %src1) {
|
||||
; CHECK-LABEL: shrs_qi_int64_t:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: asrl r0, r1, #4
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: asrl r2, r3, #4
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
|
@ -344,11 +336,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @shl_qr_int64_t(<2 x i64> %src1, i64 %src2) {
|
||||
; CHECK-LABEL: shl_qr_int64_t:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r12, s2
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r12, r1, d1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: lsll r12, r1, r0
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: lsll r2, r3, r0
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r12
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
|
@ -403,16 +393,16 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @shru_qr_int64_t(<2 x i64> %src1, i64 %src2) {
|
||||
; CHECK-LABEL: shru_qr_int64_t:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: rsb.w r12, r0, #0
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: lsll r2, r1, r12
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: lsll r0, r3, r12
|
||||
; CHECK-NEXT: .save {r5, lr}
|
||||
; CHECK-NEXT: push {r5, lr}
|
||||
; CHECK-NEXT: rsbs r3, r0, #0
|
||||
; CHECK-NEXT: vmov r2, r1, d1
|
||||
; CHECK-NEXT: vmov r0, r5, d0
|
||||
; CHECK-NEXT: lsll r2, r1, r3
|
||||
; CHECK-NEXT: lsll r0, r5, r3
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r5, r1
|
||||
; CHECK-NEXT: pop {r5, pc}
|
||||
entry:
|
||||
%i = insertelement <2 x i64> undef, i64 %src2, i32 0
|
||||
%s = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
|
@ -463,11 +453,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @shrs_qr_int64_t(<2 x i64> %src1, i64 %src2) {
|
||||
; CHECK-LABEL: shrs_qr_int64_t:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r12, s2
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r12, r1, d1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: asrl r12, r1, r0
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: asrl r2, r3, r0
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r12
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
|
|
|
@ -1678,8 +1678,7 @@ entry:
|
|||
define arm_aapcs_vfpcc i64 @extract_i64_0(<2 x i64> %a) {
|
||||
; CHECK-LABEL: extract_i64_0:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r0, r1, d0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%res = extractelement <2 x i64> %a, i32 0
|
||||
|
@ -1689,8 +1688,7 @@ entry:
|
|||
define arm_aapcs_vfpcc i64 @extract_i64_1(<2 x i64> %a) {
|
||||
; CHECK-LABEL: extract_i64_1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%res = extractelement <2 x i64> %a, i32 1
|
||||
|
|
|
@ -35,23 +35,19 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @add_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
|
||||
; CHECK-LABEL: add_int64_t:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: adds.w lr, r3, r2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: adc.w r12, r1, r0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, lr
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r12
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov lr, r12, d3
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: vmov r1, r0, d2
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: adds.w r2, r2, lr
|
||||
; CHECK-NEXT: adc.w r3, r3, r12
|
||||
; CHECK-NEXT: adds r1, r1, r4
|
||||
; CHECK-NEXT: adcs r0, r5
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r2
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r3
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%0 = add nsw <2 x i64> %src1, %src2
|
||||
ret <2 x i64> %0
|
||||
|
@ -172,23 +168,19 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @sub_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
|
||||
; CHECK-LABEL: sub_int64_t:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: subs.w lr, r3, r2
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: vmov r3, s5
|
||||
; CHECK-NEXT: sbc.w r12, r1, r0
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: subs r0, r2, r0
|
||||
; CHECK-NEXT: sbc.w r1, r3, r1
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, lr
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r12
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov lr, r12, d1
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: vmov r1, r0, d0
|
||||
; CHECK-NEXT: vmov r4, r5, d2
|
||||
; CHECK-NEXT: subs.w r2, r2, lr
|
||||
; CHECK-NEXT: sbc.w r3, r3, r12
|
||||
; CHECK-NEXT: subs r1, r4, r1
|
||||
; CHECK-NEXT: sbc.w r0, r5, r0
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r2
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r3
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%0 = sub nsw <2 x i64> %src2, %src1
|
||||
ret <2 x i64> %0
|
||||
|
@ -309,25 +301,21 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @mul_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
|
||||
; CHECK-LABEL: mul_int64_t:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s7
|
||||
; CHECK-NEXT: umull r12, r3, r1, r0
|
||||
; CHECK-NEXT: mla lr, r1, r2, r3
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s5
|
||||
; CHECK-NEXT: umull r4, r5, r1, r3
|
||||
; CHECK-NEXT: mla r1, r1, r2, r5
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: mla r0, r2, r0, lr
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r4, r12
|
||||
; CHECK-NEXT: mla r1, r2, r3, r1
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: vmov r2, lr, d0
|
||||
; CHECK-NEXT: vmov r4, r5, d3
|
||||
; CHECK-NEXT: umull r12, r3, r2, r0
|
||||
; CHECK-NEXT: mla r1, r2, r1, r3
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: mla r0, lr, r0, r1
|
||||
; CHECK-NEXT: umull r6, r7, r2, r4
|
||||
; CHECK-NEXT: mla r2, r2, r5, r7
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r12, r6
|
||||
; CHECK-NEXT: mla r2, r3, r4, r2
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r2
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
entry:
|
||||
%0 = mul nsw <2 x i64> %src1, %src2
|
||||
ret <2 x i64> %0
|
||||
|
|
|
@ -91,21 +91,19 @@ entry:
|
|||
}
|
||||
|
||||
define <2 x i64> @vector_add_i64(<2 x i64> %lhs, <2 x i64> %rhs) {
|
||||
; CHECK-LE-LABEL: vector_add_i64:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: .save {r7, lr}
|
||||
; CHECK-LE-NEXT: push {r7, lr}
|
||||
; CHECK-LE-NEXT: add.w r12, sp, #8
|
||||
; CHECK-LE-NEXT: vldrw.u32 q0, [r12]
|
||||
; CHECK-LE-NEXT: vmov lr, s0
|
||||
; CHECK-LE-NEXT: vmov r12, s1
|
||||
; CHECK-LE-NEXT: adds.w r0, r0, lr
|
||||
; CHECK-LE-NEXT: vmov lr, s2
|
||||
; CHECK-LE-NEXT: adc.w r1, r1, r12
|
||||
; CHECK-LE-NEXT: vmov r12, s3
|
||||
; CHECK-LE-NEXT: adds.w r2, r2, lr
|
||||
; CHECK-LE-NEXT: adc.w r3, r3, r12
|
||||
; CHECK-LE-NEXT: pop {r7, pc}
|
||||
; CHECK-MVE-LABEL: vector_add_i64:
|
||||
; CHECK-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-MVE-NEXT: .save {r7, lr}
|
||||
; CHECK-MVE-NEXT: push {r7, lr}
|
||||
; CHECK-MVE-NEXT: add.w r12, sp, #8
|
||||
; CHECK-MVE-NEXT: vldrw.u32 q0, [r12]
|
||||
; CHECK-MVE-NEXT: vmov r12, lr, d0
|
||||
; CHECK-MVE-NEXT: adds.w r0, r0, r12
|
||||
; CHECK-MVE-NEXT: adc.w r1, r1, lr
|
||||
; CHECK-MVE-NEXT: vmov r12, lr, d1
|
||||
; CHECK-MVE-NEXT: adds.w r2, r2, r12
|
||||
; CHECK-MVE-NEXT: adc.w r3, r3, lr
|
||||
; CHECK-MVE-NEXT: pop {r7, pc}
|
||||
;
|
||||
; CHECK-BE-LABEL: vector_add_i64:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
|
@ -113,15 +111,27 @@ define <2 x i64> @vector_add_i64(<2 x i64> %lhs, <2 x i64> %rhs) {
|
|||
; CHECK-BE-NEXT: push {r7, lr}
|
||||
; CHECK-BE-NEXT: add.w r12, sp, #8
|
||||
; CHECK-BE-NEXT: vldrw.u32 q0, [r12]
|
||||
; CHECK-BE-NEXT: vmov lr, s1
|
||||
; CHECK-BE-NEXT: vmov r12, s0
|
||||
; CHECK-BE-NEXT: vmov r12, lr, d0
|
||||
; CHECK-BE-NEXT: adds.w r1, r1, lr
|
||||
; CHECK-BE-NEXT: vmov lr, s3
|
||||
; CHECK-BE-NEXT: adc.w r0, r0, r12
|
||||
; CHECK-BE-NEXT: vmov r12, s2
|
||||
; CHECK-BE-NEXT: vmov r12, lr, d1
|
||||
; CHECK-BE-NEXT: adds.w r3, r3, lr
|
||||
; CHECK-BE-NEXT: adc.w r2, r2, r12
|
||||
; CHECK-BE-NEXT: pop {r7, pc}
|
||||
;
|
||||
; CHECK-FP-LABEL: vector_add_i64:
|
||||
; CHECK-FP: @ %bb.0: @ %entry
|
||||
; CHECK-FP-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-FP-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-FP-NEXT: add.w r12, sp, #16
|
||||
; CHECK-FP-NEXT: vldrw.u32 q0, [r12]
|
||||
; CHECK-FP-NEXT: vmov r12, lr, d0
|
||||
; CHECK-FP-NEXT: vmov r4, r5, d1
|
||||
; CHECK-FP-NEXT: adds.w r0, r0, r12
|
||||
; CHECK-FP-NEXT: adc.w r1, r1, lr
|
||||
; CHECK-FP-NEXT: adds r2, r2, r4
|
||||
; CHECK-FP-NEXT: adcs r3, r5
|
||||
; CHECK-FP-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%sum = add <2 x i64> %lhs, %rhs
|
||||
ret <2 x i64> %sum
|
||||
|
@ -338,67 +348,67 @@ entry:
|
|||
define <4 x float> @vector_add_f32(<4 x float> %lhs, <4 x float> %rhs) {
|
||||
; CHECK-MVE-LABEL: vector_add_f32:
|
||||
; CHECK-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-MVE-NEXT: .save {r7, lr}
|
||||
; CHECK-MVE-NEXT: push {r7, lr}
|
||||
; CHECK-MVE-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-MVE-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-MVE-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-MVE-NEXT: vmov d11, r2, r3
|
||||
; CHECK-MVE-NEXT: vmov d10, r0, r1
|
||||
; CHECK-MVE-NEXT: add r1, sp, #56
|
||||
; CHECK-MVE-NEXT: vldrw.u32 q6, [r1]
|
||||
; CHECK-MVE-NEXT: vmov r0, s23
|
||||
; CHECK-MVE-NEXT: vmov r1, s27
|
||||
; CHECK-MVE-NEXT: vmov d13, r2, r3
|
||||
; CHECK-MVE-NEXT: vmov d12, r0, r1
|
||||
; CHECK-MVE-NEXT: add r1, sp, #64
|
||||
; CHECK-MVE-NEXT: vldrw.u32 q5, [r1]
|
||||
; CHECK-MVE-NEXT: vmov r4, r0, d13
|
||||
; CHECK-MVE-NEXT: vmov r5, r1, d11
|
||||
; CHECK-MVE-NEXT: bl __aeabi_fadd
|
||||
; CHECK-MVE-NEXT: vmov s19, r0
|
||||
; CHECK-MVE-NEXT: vmov r0, s22
|
||||
; CHECK-MVE-NEXT: vmov r1, s26
|
||||
; CHECK-MVE-NEXT: mov r0, r4
|
||||
; CHECK-MVE-NEXT: mov r1, r5
|
||||
; CHECK-MVE-NEXT: bl __aeabi_fadd
|
||||
; CHECK-MVE-NEXT: vmov s18, r0
|
||||
; CHECK-MVE-NEXT: vmov r0, s21
|
||||
; CHECK-MVE-NEXT: vmov r1, s25
|
||||
; CHECK-MVE-NEXT: vmov r4, r0, d12
|
||||
; CHECK-MVE-NEXT: vmov r5, r1, d10
|
||||
; CHECK-MVE-NEXT: bl __aeabi_fadd
|
||||
; CHECK-MVE-NEXT: vmov s17, r0
|
||||
; CHECK-MVE-NEXT: vmov r0, s20
|
||||
; CHECK-MVE-NEXT: vmov r1, s24
|
||||
; CHECK-MVE-NEXT: mov r0, r4
|
||||
; CHECK-MVE-NEXT: mov r1, r5
|
||||
; CHECK-MVE-NEXT: bl __aeabi_fadd
|
||||
; CHECK-MVE-NEXT: vmov s16, r0
|
||||
; CHECK-MVE-NEXT: vmov r2, r3, d9
|
||||
; CHECK-MVE-NEXT: vmov r0, r1, d8
|
||||
; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-MVE-NEXT: pop {r7, pc}
|
||||
; CHECK-MVE-NEXT: pop {r4, r5, r7, pc}
|
||||
;
|
||||
; CHECK-BE-LABEL: vector_add_f32:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: .save {r7, lr}
|
||||
; CHECK-BE-NEXT: push {r7, lr}
|
||||
; CHECK-BE-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-BE-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-BE-NEXT: vpush {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-BE-NEXT: vmov d1, r3, r2
|
||||
; CHECK-BE-NEXT: vmov d0, r1, r0
|
||||
; CHECK-BE-NEXT: add r1, sp, #56
|
||||
; CHECK-BE-NEXT: add r1, sp, #64
|
||||
; CHECK-BE-NEXT: vldrw.u32 q6, [r1]
|
||||
; CHECK-BE-NEXT: vrev64.32 q5, q0
|
||||
; CHECK-BE-NEXT: vmov r0, s23
|
||||
; CHECK-BE-NEXT: vmov r1, s27
|
||||
; CHECK-BE-NEXT: vmov r4, r0, d11
|
||||
; CHECK-BE-NEXT: vmov r5, r1, d13
|
||||
; CHECK-BE-NEXT: bl __aeabi_fadd
|
||||
; CHECK-BE-NEXT: vmov s19, r0
|
||||
; CHECK-BE-NEXT: vmov r0, s22
|
||||
; CHECK-BE-NEXT: vmov r1, s26
|
||||
; CHECK-BE-NEXT: mov r0, r4
|
||||
; CHECK-BE-NEXT: mov r1, r5
|
||||
; CHECK-BE-NEXT: bl __aeabi_fadd
|
||||
; CHECK-BE-NEXT: vmov s18, r0
|
||||
; CHECK-BE-NEXT: vmov r0, s21
|
||||
; CHECK-BE-NEXT: vmov r1, s25
|
||||
; CHECK-BE-NEXT: vmov r4, r0, d10
|
||||
; CHECK-BE-NEXT: vmov r5, r1, d12
|
||||
; CHECK-BE-NEXT: bl __aeabi_fadd
|
||||
; CHECK-BE-NEXT: vmov s17, r0
|
||||
; CHECK-BE-NEXT: vmov r0, s20
|
||||
; CHECK-BE-NEXT: vmov r1, s24
|
||||
; CHECK-BE-NEXT: mov r0, r4
|
||||
; CHECK-BE-NEXT: mov r1, r5
|
||||
; CHECK-BE-NEXT: bl __aeabi_fadd
|
||||
; CHECK-BE-NEXT: vmov s16, r0
|
||||
; CHECK-BE-NEXT: vrev64.32 q0, q4
|
||||
; CHECK-BE-NEXT: vmov r1, r0, d0
|
||||
; CHECK-BE-NEXT: vmov r3, r2, d1
|
||||
; CHECK-BE-NEXT: vpop {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-BE-NEXT: pop {r7, pc}
|
||||
; CHECK-BE-NEXT: pop {r4, r5, r7, pc}
|
||||
;
|
||||
; CHECK-FP-LABEL: vector_add_f32:
|
||||
; CHECK-FP: @ %bb.0: @ %entry
|
||||
|
|
|
@ -5,42 +5,42 @@
|
|||
define arm_aapcs_vfpcc void @vabd_v4f32(<4 x float> %x, <4 x float> %y, <4 x float>* %z) {
|
||||
; CHECK-MVE-LABEL: vabd_v4f32:
|
||||
; CHECK-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-MVE-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-MVE-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-MVE-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
|
||||
; CHECK-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
|
||||
; CHECK-MVE-NEXT: .pad #4
|
||||
; CHECK-MVE-NEXT: sub sp, #4
|
||||
; CHECK-MVE-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-MVE-NEXT: vmov q4, q1
|
||||
; CHECK-MVE-NEXT: vmov q5, q0
|
||||
; CHECK-MVE-NEXT: mov r4, r0
|
||||
; CHECK-MVE-NEXT: vmov r0, s20
|
||||
; CHECK-MVE-NEXT: vmov r1, s16
|
||||
; CHECK-MVE-NEXT: mov r8, r0
|
||||
; CHECK-MVE-NEXT: vmov r0, r6, d10
|
||||
; CHECK-MVE-NEXT: vmov r1, r7, d8
|
||||
; CHECK-MVE-NEXT: bl __aeabi_fsub
|
||||
; CHECK-MVE-NEXT: mov r5, r0
|
||||
; CHECK-MVE-NEXT: vmov r0, s21
|
||||
; CHECK-MVE-NEXT: vmov r1, s17
|
||||
; CHECK-MVE-NEXT: mov r9, r0
|
||||
; CHECK-MVE-NEXT: mov r0, r6
|
||||
; CHECK-MVE-NEXT: mov r1, r7
|
||||
; CHECK-MVE-NEXT: bl __aeabi_fsub
|
||||
; CHECK-MVE-NEXT: mov r6, r0
|
||||
; CHECK-MVE-NEXT: vmov r0, s22
|
||||
; CHECK-MVE-NEXT: vmov r1, s18
|
||||
; CHECK-MVE-NEXT: vmov r0, r7, d11
|
||||
; CHECK-MVE-NEXT: vmov r1, r4, d9
|
||||
; CHECK-MVE-NEXT: bl __aeabi_fsub
|
||||
; CHECK-MVE-NEXT: mov r7, r0
|
||||
; CHECK-MVE-NEXT: vmov r0, s23
|
||||
; CHECK-MVE-NEXT: vmov r1, s19
|
||||
; CHECK-MVE-NEXT: mov r5, r0
|
||||
; CHECK-MVE-NEXT: mov r0, r7
|
||||
; CHECK-MVE-NEXT: mov r1, r4
|
||||
; CHECK-MVE-NEXT: bl __aeabi_fsub
|
||||
; CHECK-MVE-NEXT: bic r0, r0, #-2147483648
|
||||
; CHECK-MVE-NEXT: vmov s3, r0
|
||||
; CHECK-MVE-NEXT: bic r0, r7, #-2147483648
|
||||
; CHECK-MVE-NEXT: bic r0, r5, #-2147483648
|
||||
; CHECK-MVE-NEXT: vmov s2, r0
|
||||
; CHECK-MVE-NEXT: bic r0, r6, #-2147483648
|
||||
; CHECK-MVE-NEXT: vmov s1, r0
|
||||
; CHECK-MVE-NEXT: bic r0, r5, #-2147483648
|
||||
; CHECK-MVE-NEXT: bic r0, r9, #-2147483648
|
||||
; CHECK-MVE-NEXT: vmov s0, r0
|
||||
; CHECK-MVE-NEXT: vstrw.32 q0, [r4]
|
||||
; CHECK-MVE-NEXT: vstrw.32 q0, [r8]
|
||||
; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-MVE-NEXT: add sp, #4
|
||||
; CHECK-MVE-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
; CHECK-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
|
||||
;
|
||||
; CHECK-MVEFP-LABEL: vabd_v4f32:
|
||||
; CHECK-MVEFP: @ %bb.0: @ %entry
|
||||
|
|
|
@ -159,43 +159,35 @@ define arm_aapcs_vfpcc <4 x i32> @vabd_u32(<4 x i32> %src1, <4 x i32> %src2) {
|
|||
; CHECK-NEXT: vmov.f32 s14, s3
|
||||
; CHECK-NEXT: vand q2, q2, q4
|
||||
; CHECK-NEXT: vand q3, q3, q4
|
||||
; CHECK-NEXT: vmov r2, s8
|
||||
; CHECK-NEXT: vmov r3, s12
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov r1, s13
|
||||
; CHECK-NEXT: vmov r0, r1, d4
|
||||
; CHECK-NEXT: vmov r2, r3, d6
|
||||
; CHECK-NEXT: vmov.f32 s6, s5
|
||||
; CHECK-NEXT: vmov.f32 s2, s1
|
||||
; CHECK-NEXT: vand q1, q1, q4
|
||||
; CHECK-NEXT: vand q4, q0, q4
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: sbc.w r0, r1, r0
|
||||
; CHECK-NEXT: add.w r1, r2, r0, asr #31
|
||||
; CHECK-NEXT: vmov r2, s17
|
||||
; CHECK-NEXT: eor.w r12, r1, r0, asr #31
|
||||
; CHECK-NEXT: vmov r0, s16
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: subs r0, r0, r3
|
||||
; CHECK-NEXT: vmov r3, s14
|
||||
; CHECK-NEXT: sbc.w r1, r2, r1
|
||||
; CHECK-NEXT: vmov r2, s10
|
||||
; CHECK-NEXT: subs r0, r2, r0
|
||||
; CHECK-NEXT: sbc.w r1, r3, r1
|
||||
; CHECK-NEXT: add.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: eor.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: vmov r1, s15
|
||||
; CHECK-NEXT: eor.w r12, r0, r1, asr #31
|
||||
; CHECK-NEXT: vmov r1, r2, d2
|
||||
; CHECK-NEXT: vmov r3, r0, d8
|
||||
; CHECK-NEXT: subs r1, r3, r1
|
||||
; CHECK-NEXT: sbcs r0, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d7
|
||||
; CHECK-NEXT: add.w r1, r1, r0, asr #31
|
||||
; CHECK-NEXT: eor.w r0, r1, r0, asr #31
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r12
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: sbc.w r0, r1, r0
|
||||
; CHECK-NEXT: add.w r1, r2, r0, asr #31
|
||||
; CHECK-NEXT: vmov r2, s19
|
||||
; CHECK-NEXT: eor.w r12, r1, r0, asr #31
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: subs r0, r0, r3
|
||||
; CHECK-NEXT: sbc.w r1, r2, r1
|
||||
; CHECK-NEXT: vmov r0, r1, d5
|
||||
; CHECK-NEXT: subs r0, r2, r0
|
||||
; CHECK-NEXT: sbc.w r1, r3, r1
|
||||
; CHECK-NEXT: add.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: eor.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: eor.w r12, r0, r1, asr #31
|
||||
; CHECK-NEXT: vmov r1, r2, d3
|
||||
; CHECK-NEXT: vmov r3, r0, d9
|
||||
; CHECK-NEXT: subs r1, r3, r1
|
||||
; CHECK-NEXT: sbcs r0, r2
|
||||
; CHECK-NEXT: add.w r1, r1, r0, asr #31
|
||||
; CHECK-NEXT: eor.w r0, r1, r0, asr #31
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r12
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: bx lr
|
||||
|
@ -548,57 +540,49 @@ define void @vabd_loop_u32(i32* nocapture readonly %x, i32* nocapture readonly %
|
|||
; CHECK-NEXT: vmov.f32 s14, s9
|
||||
; CHECK-NEXT: vand q4, q3, q0
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r0], #16
|
||||
; CHECK-NEXT: vmov r3, s16
|
||||
; CHECK-NEXT: vmov r3, r4, d8
|
||||
; CHECK-NEXT: vmov.f32 s20, s12
|
||||
; CHECK-NEXT: vmov.f32 s22, s13
|
||||
; CHECK-NEXT: vand q5, q5, q0
|
||||
; CHECK-NEXT: vmov r4, s17
|
||||
; CHECK-NEXT: vmov r6, s20
|
||||
; CHECK-NEXT: vmov r5, s21
|
||||
; CHECK-NEXT: vmov r7, s23
|
||||
; CHECK-NEXT: subs.w r8, r6, r3
|
||||
; CHECK-NEXT: vmov r3, s22
|
||||
; CHECK-NEXT: sbc.w r4, r5, r4
|
||||
; CHECK-NEXT: vmov r6, s19
|
||||
; CHECK-NEXT: vmov r5, r6, d10
|
||||
; CHECK-NEXT: subs.w r8, r5, r3
|
||||
; CHECK-NEXT: vmov r7, r3, d11
|
||||
; CHECK-NEXT: sbc.w r4, r6, r4
|
||||
; CHECK-NEXT: asrs r5, r4, #31
|
||||
; CHECK-NEXT: movs r4, #0
|
||||
; CHECK-NEXT: bfi r4, r5, #0, #4
|
||||
; CHECK-NEXT: vmov r5, s18
|
||||
; CHECK-NEXT: vmov r5, r6, d9
|
||||
; CHECK-NEXT: vmov.f32 s16, s10
|
||||
; CHECK-NEXT: vmov.f32 s18, s11
|
||||
; CHECK-NEXT: vand q2, q4, q0
|
||||
; CHECK-NEXT: vmov.f32 s16, s14
|
||||
; CHECK-NEXT: vmov.f32 s18, s15
|
||||
; CHECK-NEXT: vand q3, q4, q0
|
||||
; CHECK-NEXT: vmov r12, s12
|
||||
; CHECK-NEXT: subs.w r9, r3, r5
|
||||
; CHECK-NEXT: vmov r5, s14
|
||||
; CHECK-NEXT: sbc.w r3, r7, r6
|
||||
; CHECK-NEXT: movs r7, #1
|
||||
; CHECK-NEXT: vmov r6, s15
|
||||
; CHECK-NEXT: subs.w r9, r7, r5
|
||||
; CHECK-NEXT: mov.w r7, #1
|
||||
; CHECK-NEXT: sbcs r3, r6
|
||||
; CHECK-NEXT: and.w r3, r7, r3, asr #31
|
||||
; CHECK-NEXT: vmov r7, s10
|
||||
; CHECK-NEXT: vmov r7, r5, d7
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r4, r3, #4, #4
|
||||
; CHECK-NEXT: vmov r3, s11
|
||||
; CHECK-NEXT: subs.w r10, r5, r7
|
||||
; CHECK-NEXT: vmov r7, s9
|
||||
; CHECK-NEXT: vmov r5, s13
|
||||
; CHECK-NEXT: sbc.w r3, r6, r3
|
||||
; CHECK-NEXT: vmov r6, s8
|
||||
; CHECK-NEXT: asr.w r11, r3, #31
|
||||
; CHECK-NEXT: subs.w r6, r12, r6
|
||||
; CHECK-NEXT: sbc.w r7, r5, r7
|
||||
; CHECK-NEXT: asrs r7, r7, #31
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r7, r11
|
||||
; CHECK-NEXT: vmov r7, s8
|
||||
; CHECK-NEXT: vmov r3, r6, d5
|
||||
; CHECK-NEXT: subs.w r10, r7, r3
|
||||
; CHECK-NEXT: vmov r7, r3, d4
|
||||
; CHECK-NEXT: sbcs r5, r6
|
||||
; CHECK-NEXT: vmov r6, r12, d6
|
||||
; CHECK-NEXT: asr.w r11, r5, #31
|
||||
; CHECK-NEXT: subs r6, r6, r7
|
||||
; CHECK-NEXT: sbc.w r3, r12, r3
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r3, r11
|
||||
; CHECK-NEXT: vmov r3, s8
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r8, r6
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r9, r10
|
||||
; CHECK-NEXT: and r7, r7, #1
|
||||
; CHECK-NEXT: rsbs r7, r7, #0
|
||||
; CHECK-NEXT: bfi r4, r7, #8, #4
|
||||
; CHECK-NEXT: movs r7, #1
|
||||
; CHECK-NEXT: and.w r3, r7, r3, asr #31
|
||||
; CHECK-NEXT: and r3, r3, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r4, r3, #8, #4
|
||||
; CHECK-NEXT: movs r3, #1
|
||||
; CHECK-NEXT: and.w r3, r3, r5, asr #31
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r4, r3, #12, #4
|
||||
; CHECK-NEXT: vmsr p0, r4
|
||||
|
|
|
@ -12,12 +12,10 @@ declare i8 @llvm.vector.reduce.add.i8.v32i8(<32 x i8>)
|
|||
define arm_aapcs_vfpcc i64 @vaddv_v2i64_i64(<2 x i64> %s1) {
|
||||
; CHECK-LABEL: vaddv_v2i64_i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: adds r0, r0, r3
|
||||
; CHECK-NEXT: adcs r1, r2
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = call i64 @llvm.vector.reduce.add.i64.v2i64(<2 x i64> %s1)
|
||||
|
@ -92,14 +90,12 @@ define arm_aapcs_vfpcc i64 @vaddva_v2i64_i64(<2 x i64> %s1, i64 %x) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r12, s3
|
||||
; CHECK-NEXT: vmov lr, s1
|
||||
; CHECK-NEXT: adds r2, r2, r3
|
||||
; CHECK-NEXT: adc.w r3, lr, r12
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov lr, r12, d1
|
||||
; CHECK-NEXT: vmov r3, r2, d0
|
||||
; CHECK-NEXT: adds.w r3, r3, lr
|
||||
; CHECK-NEXT: adc.w r2, r2, r12
|
||||
; CHECK-NEXT: adds r0, r0, r3
|
||||
; CHECK-NEXT: adcs r1, r2
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%t = call i64 @llvm.vector.reduce.add.i64.v2i64(<2 x i64> %s1)
|
||||
|
|
|
@ -367,22 +367,18 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @vcmp_eq_v2i64(<2 x i64> %src, <2 x i64> %srcb, <2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: vcmp_eq_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: eors r0, r2
|
||||
; CHECK-NEXT: eors r1, r3
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vmov r12, r2, d2
|
||||
; CHECK-NEXT: vmov r3, r1, d0
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: eors r2, r3
|
||||
; CHECK-NEXT: eor.w r2, r3, r12
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
|
@ -402,22 +398,18 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i32> @vcmp_eq_v2i32(<2 x i64> %src, <2 x i64> %srcb, <2 x i32> %a, <2 x i32> %b) {
|
||||
; CHECK-LABEL: vcmp_eq_v2i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: eors r0, r2
|
||||
; CHECK-NEXT: eors r1, r3
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vmov r12, r2, d2
|
||||
; CHECK-NEXT: vmov r3, r1, d0
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: eors r2, r3
|
||||
; CHECK-NEXT: eor.w r2, r3, r12
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
|
@ -441,12 +433,10 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, <
|
|||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
|
|
|
@ -433,18 +433,16 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @vcmp_eq_v2i64(<2 x i64> %src, i64 %src2, <2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: vcmp_eq_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: eors r2, r1
|
||||
; CHECK-NEXT: eors r3, r0
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: eors r3, r1
|
||||
; CHECK-NEXT: eors r2, r0
|
||||
; CHECK-NEXT: orrs r2, r3
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r12, r3, d0
|
||||
; CHECK-NEXT: cset r2, eq
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: csetm r2, ne
|
||||
; CHECK-NEXT: eors r1, r3
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: eors r0, r3
|
||||
; CHECK-NEXT: eor.w r0, r0, r12
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
|
@ -466,18 +464,16 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i32> @vcmp_eq_v2i32(<2 x i64> %src, i64 %src2, <2 x i32> %a, <2 x i32> %b) {
|
||||
; CHECK-LABEL: vcmp_eq_v2i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: eors r2, r1
|
||||
; CHECK-NEXT: eors r3, r0
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: eors r3, r1
|
||||
; CHECK-NEXT: eors r2, r0
|
||||
; CHECK-NEXT: orrs r2, r3
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r12, r3, d0
|
||||
; CHECK-NEXT: cset r2, eq
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: csetm r2, ne
|
||||
; CHECK-NEXT: eors r1, r3
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: eors r0, r3
|
||||
; CHECK-NEXT: eor.w r0, r0, r12
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
|
@ -503,12 +499,10 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, <
|
|||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
|
@ -1014,18 +1008,16 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @vcmp_r_eq_v2i64(<2 x i64> %src, i64 %src2, <2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: vcmp_r_eq_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: eors r2, r1
|
||||
; CHECK-NEXT: eors r3, r0
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: eors r3, r1
|
||||
; CHECK-NEXT: eors r2, r0
|
||||
; CHECK-NEXT: orrs r2, r3
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r12, r3, d0
|
||||
; CHECK-NEXT: cset r2, eq
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: csetm r2, ne
|
||||
; CHECK-NEXT: eors r1, r3
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: eors r0, r3
|
||||
; CHECK-NEXT: eor.w r0, r0, r12
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
|
@ -1047,18 +1039,16 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i32> @vcmp_r_eq_v2i32(<2 x i64> %src, i64 %src2, <2 x i32> %a, <2 x i32> %b) {
|
||||
; CHECK-LABEL: vcmp_r_eq_v2i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: eors r2, r1
|
||||
; CHECK-NEXT: eors r3, r0
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: eors r3, r1
|
||||
; CHECK-NEXT: eors r2, r0
|
||||
; CHECK-NEXT: orrs r2, r3
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r12, r3, d0
|
||||
; CHECK-NEXT: cset r2, eq
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: csetm r2, ne
|
||||
; CHECK-NEXT: eors r1, r3
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: eors r0, r3
|
||||
; CHECK-NEXT: eor.w r0, r0, r12
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
|
@ -1084,12 +1074,10 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_r_multi_v2i32(<2 x i64> %a, <2 x i32> %b,
|
|||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
|
|
|
@ -361,11 +361,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @vcmp_eqz_v2i64(<2 x i64> %src, <2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: vcmp_eqz_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
|
@ -388,11 +386,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i32> @vcmp_eqz_v2i32(<2 x i64> %src, <2 x i32> %a, <2 x i32> %b) {
|
||||
; CHECK-LABEL: vcmp_eqz_v2i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
|
@ -775,11 +771,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @vcmp_r_eqz_v2i64(<2 x i64> %src, <2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: vcmp_r_eqz_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
|
@ -802,11 +796,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i32> @vcmp_r_eqz_v2i32(<2 x i64> %src, <2 x i32> %a, <2 x i32> %b) {
|
||||
; CHECK-LABEL: vcmp_r_eqz_v2i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
|
|
|
@ -266,11 +266,9 @@ define arm_aapcs_vfpcc <2 x double> @foo_float_int64(<2 x i64> %src) {
|
|||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov q4, q0
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov r1, s19
|
||||
; CHECK-NEXT: vmov r0, r1, d9
|
||||
; CHECK-NEXT: bl __aeabi_l2d
|
||||
; CHECK-NEXT: vmov r2, s16
|
||||
; CHECK-NEXT: vmov r3, s17
|
||||
; CHECK-NEXT: vmov r2, r3, d8
|
||||
; CHECK-NEXT: vmov d9, r0, r1
|
||||
; CHECK-NEXT: mov r0, r2
|
||||
; CHECK-NEXT: mov r1, r3
|
||||
|
@ -292,11 +290,9 @@ define arm_aapcs_vfpcc <2 x double> @foo_float_uint64(<2 x i64> %src) {
|
|||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov q4, q0
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov r1, s19
|
||||
; CHECK-NEXT: vmov r0, r1, d9
|
||||
; CHECK-NEXT: bl __aeabi_ul2d
|
||||
; CHECK-NEXT: vmov r2, s16
|
||||
; CHECK-NEXT: vmov r3, s17
|
||||
; CHECK-NEXT: vmov r2, r3, d8
|
||||
; CHECK-NEXT: vmov d9, r0, r1
|
||||
; CHECK-NEXT: mov r0, r2
|
||||
; CHECK-NEXT: mov r1, r3
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -16,11 +16,9 @@ entry:
|
|||
define arm_aapcs_vfpcc i32 @and_v4i32(<4 x i32> %x) {
|
||||
; CHECK-LABEL: and_v4i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: ands r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: ands r1, r2
|
||||
; CHECK-NEXT: ands r0, r1
|
||||
; CHECK-NEXT: bx lr
|
||||
|
@ -33,11 +31,9 @@ define arm_aapcs_vfpcc i32 @and_v8i32(<8 x i32> %x) {
|
|||
; CHECK-LABEL: and_v8i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: ands r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: ands r1, r2
|
||||
; CHECK-NEXT: ands r0, r1
|
||||
; CHECK-NEXT: bx lr
|
||||
|
@ -49,11 +45,9 @@ entry:
|
|||
define arm_aapcs_vfpcc i16 @and_v4i16(<4 x i16> %x) {
|
||||
; CHECK-LABEL: and_v4i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: ands r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: ands r1, r2
|
||||
; CHECK-NEXT: ands r0, r1
|
||||
; CHECK-NEXT: bx lr
|
||||
|
@ -170,12 +164,10 @@ entry:
|
|||
define arm_aapcs_vfpcc i64 @and_v2i64(<2 x i64> %x) {
|
||||
; CHECK-LABEL: and_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: ands r0, r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: ands r1, r2
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: ands r0, r2
|
||||
; CHECK-NEXT: ands r1, r3
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%z = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %x)
|
||||
|
@ -186,12 +178,10 @@ define arm_aapcs_vfpcc i64 @and_v4i64(<4 x i64> %x) {
|
|||
; CHECK-LABEL: and_v4i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: ands r0, r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: ands r1, r2
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: ands r0, r2
|
||||
; CHECK-NEXT: ands r1, r3
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%z = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %x)
|
||||
|
@ -215,11 +205,9 @@ entry:
|
|||
define arm_aapcs_vfpcc i32 @and_v4i32_acc(<4 x i32> %x, i32 %y) {
|
||||
; CHECK-LABEL: and_v4i32_acc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r1, r2, d1
|
||||
; CHECK-NEXT: ands r1, r2
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: ands r2, r3
|
||||
; CHECK-NEXT: ands r1, r2
|
||||
; CHECK-NEXT: ands r0, r1
|
||||
|
@ -234,11 +222,9 @@ define arm_aapcs_vfpcc i32 @and_v8i32_acc(<8 x i32> %x, i32 %y) {
|
|||
; CHECK-LABEL: and_v8i32_acc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r1, r2, d1
|
||||
; CHECK-NEXT: ands r1, r2
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: ands r2, r3
|
||||
; CHECK-NEXT: ands r1, r2
|
||||
; CHECK-NEXT: ands r0, r1
|
||||
|
@ -252,11 +238,9 @@ entry:
|
|||
define arm_aapcs_vfpcc i16 @and_v4i16_acc(<4 x i16> %x, i16 %y) {
|
||||
; CHECK-LABEL: and_v4i16_acc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r1, r2, d1
|
||||
; CHECK-NEXT: ands r1, r2
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: ands r2, r3
|
||||
; CHECK-NEXT: ands r1, r2
|
||||
; CHECK-NEXT: ands r0, r1
|
||||
|
@ -388,15 +372,15 @@ entry:
|
|||
define arm_aapcs_vfpcc i64 @and_v2i64_acc(<2 x i64> %x, i64 %y) {
|
||||
; CHECK-LABEL: and_v2i64_acc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r2, r12, d1
|
||||
; CHECK-NEXT: vmov r3, lr, d0
|
||||
; CHECK-NEXT: ands r2, r3
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: ands r0, r2
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: ands r2, r3
|
||||
; CHECK-NEXT: and.w r2, lr, r12
|
||||
; CHECK-NEXT: ands r1, r2
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%z = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %x)
|
||||
%r = and i64 %y, %z
|
||||
|
@ -406,16 +390,16 @@ entry:
|
|||
define arm_aapcs_vfpcc i64 @and_v4i64_acc(<4 x i64> %x, i64 %y) {
|
||||
; CHECK-LABEL: and_v4i64_acc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r2, r12, d1
|
||||
; CHECK-NEXT: vmov r3, lr, d0
|
||||
; CHECK-NEXT: ands r2, r3
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: ands r0, r2
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: ands r2, r3
|
||||
; CHECK-NEXT: and.w r2, lr, r12
|
||||
; CHECK-NEXT: ands r1, r2
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%z = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %x)
|
||||
%r = and i64 %y, %z
|
||||
|
@ -437,11 +421,9 @@ entry:
|
|||
define arm_aapcs_vfpcc i32 @or_v4i32(<4 x i32> %x) {
|
||||
; CHECK-LABEL: or_v4i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: bx lr
|
||||
|
@ -454,11 +436,9 @@ define arm_aapcs_vfpcc i32 @or_v8i32(<8 x i32> %x) {
|
|||
; CHECK-LABEL: or_v8i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: bx lr
|
||||
|
@ -470,11 +450,9 @@ entry:
|
|||
define arm_aapcs_vfpcc i16 @or_v4i16(<4 x i16> %x) {
|
||||
; CHECK-LABEL: or_v4i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: bx lr
|
||||
|
@ -591,12 +569,10 @@ entry:
|
|||
define arm_aapcs_vfpcc i64 @or_v2i64(<2 x i64> %x) {
|
||||
; CHECK-LABEL: or_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: orrs r0, r2
|
||||
; CHECK-NEXT: orrs r1, r3
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%z = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %x)
|
||||
|
@ -607,12 +583,10 @@ define arm_aapcs_vfpcc i64 @or_v4i64(<4 x i64> %x) {
|
|||
; CHECK-LABEL: or_v4i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: orrs r0, r2
|
||||
; CHECK-NEXT: orrs r1, r3
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%z = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %x)
|
||||
|
@ -636,11 +610,9 @@ entry:
|
|||
define arm_aapcs_vfpcc i32 @or_v4i32_acc(<4 x i32> %x, i32 %y) {
|
||||
; CHECK-LABEL: or_v4i32_acc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r1, r2, d1
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: orrs r2, r3
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
|
@ -655,11 +627,9 @@ define arm_aapcs_vfpcc i32 @or_v8i32_acc(<8 x i32> %x, i32 %y) {
|
|||
; CHECK-LABEL: or_v8i32_acc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r1, r2, d1
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: orrs r2, r3
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
|
@ -673,11 +643,9 @@ entry:
|
|||
define arm_aapcs_vfpcc i16 @or_v4i16_acc(<4 x i16> %x, i16 %y) {
|
||||
; CHECK-LABEL: or_v4i16_acc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r1, r2, d1
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: orrs r2, r3
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
|
@ -809,15 +777,15 @@ entry:
|
|||
define arm_aapcs_vfpcc i64 @or_v2i64_acc(<2 x i64> %x, i64 %y) {
|
||||
; CHECK-LABEL: or_v2i64_acc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r2, r12, d1
|
||||
; CHECK-NEXT: vmov r3, lr, d0
|
||||
; CHECK-NEXT: orrs r2, r3
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: orrs r0, r2
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: orrs r2, r3
|
||||
; CHECK-NEXT: orr.w r2, lr, r12
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%z = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %x)
|
||||
%r = or i64 %y, %z
|
||||
|
@ -827,16 +795,16 @@ entry:
|
|||
define arm_aapcs_vfpcc i64 @or_v4i64_acc(<4 x i64> %x, i64 %y) {
|
||||
; CHECK-LABEL: or_v4i64_acc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r2, r12, d1
|
||||
; CHECK-NEXT: vmov r3, lr, d0
|
||||
; CHECK-NEXT: orrs r2, r3
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: orrs r0, r2
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: orrs r2, r3
|
||||
; CHECK-NEXT: orr.w r2, lr, r12
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%z = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %x)
|
||||
%r = or i64 %y, %z
|
||||
|
@ -858,11 +826,9 @@ entry:
|
|||
define arm_aapcs_vfpcc i32 @xor_v4i32(<4 x i32> %x) {
|
||||
; CHECK-LABEL: xor_v4i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
; CHECK-NEXT: bx lr
|
||||
|
@ -875,11 +841,9 @@ define arm_aapcs_vfpcc i32 @xor_v8i32(<8 x i32> %x) {
|
|||
; CHECK-LABEL: xor_v8i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: veor q0, q0, q1
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
; CHECK-NEXT: bx lr
|
||||
|
@ -891,11 +855,9 @@ entry:
|
|||
define arm_aapcs_vfpcc i16 @xor_v4i16(<4 x i16> %x) {
|
||||
; CHECK-LABEL: xor_v4i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
; CHECK-NEXT: bx lr
|
||||
|
@ -1012,12 +974,10 @@ entry:
|
|||
define arm_aapcs_vfpcc i64 @xor_v2i64(<2 x i64> %x) {
|
||||
; CHECK-LABEL: xor_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: eors r0, r2
|
||||
; CHECK-NEXT: eors r1, r3
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%z = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %x)
|
||||
|
@ -1028,12 +988,10 @@ define arm_aapcs_vfpcc i64 @xor_v4i64(<4 x i64> %x) {
|
|||
; CHECK-LABEL: xor_v4i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: veor q0, q0, q1
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: eors r0, r2
|
||||
; CHECK-NEXT: eors r1, r3
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%z = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %x)
|
||||
|
@ -1057,11 +1015,9 @@ entry:
|
|||
define arm_aapcs_vfpcc i32 @xor_v4i32_acc(<4 x i32> %x, i32 %y) {
|
||||
; CHECK-LABEL: xor_v4i32_acc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r1, r2, d1
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: eors r2, r3
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
|
@ -1076,11 +1032,9 @@ define arm_aapcs_vfpcc i32 @xor_v8i32_acc(<8 x i32> %x, i32 %y) {
|
|||
; CHECK-LABEL: xor_v8i32_acc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: veor q0, q0, q1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r1, r2, d1
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: eors r2, r3
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
|
@ -1094,11 +1048,9 @@ entry:
|
|||
define arm_aapcs_vfpcc i16 @xor_v4i16_acc(<4 x i16> %x, i16 %y) {
|
||||
; CHECK-LABEL: xor_v4i16_acc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r1, r2, d1
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: eors r2, r3
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
|
@ -1230,15 +1182,15 @@ entry:
|
|||
define arm_aapcs_vfpcc i64 @xor_v2i64_acc(<2 x i64> %x, i64 %y) {
|
||||
; CHECK-LABEL: xor_v2i64_acc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r2, r12, d1
|
||||
; CHECK-NEXT: vmov r3, lr, d0
|
||||
; CHECK-NEXT: eors r2, r3
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: eors r0, r2
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: eors r2, r3
|
||||
; CHECK-NEXT: eor.w r2, lr, r12
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%z = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %x)
|
||||
%r = xor i64 %y, %z
|
||||
|
@ -1248,16 +1200,16 @@ entry:
|
|||
define arm_aapcs_vfpcc i64 @xor_v4i64_acc(<4 x i64> %x, i64 %y) {
|
||||
; CHECK-LABEL: xor_v4i64_acc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: veor q0, q0, q1
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r2, r12, d1
|
||||
; CHECK-NEXT: vmov r3, lr, d0
|
||||
; CHECK-NEXT: eors r2, r3
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: eors r0, r2
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: eors r2, r3
|
||||
; CHECK-NEXT: eor.w r2, lr, r12
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%z = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %x)
|
||||
%r = xor i64 %y, %z
|
||||
|
|
|
@ -96,8 +96,8 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
|
|||
define i32 @mul_i32(i32* nocapture readonly %x, i32 %n) {
|
||||
; CHECK-LABEL: mul_i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: movs r2, #1
|
||||
; CHECK-NEXT: cmp r1, #1
|
||||
; CHECK-NEXT: blt .LBB1_8
|
||||
|
@ -119,14 +119,12 @@ define i32 @mul_i32(i32* nocapture readonly %x, i32 %n) {
|
|||
; CHECK-NEXT: vmul.i32 q0, q1, q0
|
||||
; CHECK-NEXT: le lr, .LBB1_4
|
||||
; CHECK-NEXT: @ %bb.5: @ %middle.block
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov lr, r3, d1
|
||||
; CHECK-NEXT: cmp r12, r1
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: mul lr, r3, r2
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r2, r4, d0
|
||||
; CHECK-NEXT: mul r3, lr, r3
|
||||
; CHECK-NEXT: mul r2, r4, r2
|
||||
; CHECK-NEXT: mul r2, r3, r2
|
||||
; CHECK-NEXT: mul r2, r2, lr
|
||||
; CHECK-NEXT: beq .LBB1_8
|
||||
; CHECK-NEXT: .LBB1_6: @ %for.body.preheader1
|
||||
; CHECK-NEXT: sub.w lr, r1, r12
|
||||
|
@ -138,7 +136,7 @@ define i32 @mul_i32(i32* nocapture readonly %x, i32 %n) {
|
|||
; CHECK-NEXT: le lr, .LBB1_7
|
||||
; CHECK-NEXT: .LBB1_8: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: mov r0, r2
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
entry:
|
||||
%cmp6 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
|
||||
|
@ -190,8 +188,8 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
|
|||
define i32 @and_i32(i32* nocapture readonly %x, i32 %n) {
|
||||
; CHECK-LABEL: and_i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: cmp r1, #1
|
||||
; CHECK-NEXT: blt .LBB2_3
|
||||
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
|
||||
|
@ -217,13 +215,11 @@ define i32 @and_i32(i32* nocapture readonly %x, i32 %n) {
|
|||
; CHECK-NEXT: vand q0, q1, q0
|
||||
; CHECK-NEXT: le lr, .LBB2_5
|
||||
; CHECK-NEXT: @ %bb.6: @ %middle.block
|
||||
; CHECK-NEXT: vmov r12, s3
|
||||
; CHECK-NEXT: vmov lr, r12, d1
|
||||
; CHECK-NEXT: cmp r3, r1
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov lr, s1
|
||||
; CHECK-NEXT: and.w r12, r12, r2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: and.w r2, r2, lr
|
||||
; CHECK-NEXT: vmov r2, r4, d0
|
||||
; CHECK-NEXT: and.w r12, r12, lr
|
||||
; CHECK-NEXT: and.w r2, r2, r4
|
||||
; CHECK-NEXT: and.w r2, r2, r12
|
||||
; CHECK-NEXT: beq .LBB2_9
|
||||
; CHECK-NEXT: .LBB2_7: @ %for.body.preheader1
|
||||
|
@ -236,7 +232,7 @@ define i32 @and_i32(i32* nocapture readonly %x, i32 %n) {
|
|||
; CHECK-NEXT: le lr, .LBB2_8
|
||||
; CHECK-NEXT: .LBB2_9: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: mov r0, r2
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
entry:
|
||||
%cmp6 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
|
||||
|
@ -288,8 +284,8 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
|
|||
define i32 @or_i32(i32* nocapture readonly %x, i32 %n) {
|
||||
; CHECK-LABEL: or_i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: cmp r1, #1
|
||||
; CHECK-NEXT: blt .LBB3_3
|
||||
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
|
||||
|
@ -315,13 +311,11 @@ define i32 @or_i32(i32* nocapture readonly %x, i32 %n) {
|
|||
; CHECK-NEXT: vorr q0, q1, q0
|
||||
; CHECK-NEXT: le lr, .LBB3_5
|
||||
; CHECK-NEXT: @ %bb.6: @ %middle.block
|
||||
; CHECK-NEXT: vmov r12, s3
|
||||
; CHECK-NEXT: vmov lr, r12, d1
|
||||
; CHECK-NEXT: cmp r3, r1
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov lr, s1
|
||||
; CHECK-NEXT: orr.w r12, r12, r2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: orr.w r2, r2, lr
|
||||
; CHECK-NEXT: vmov r2, r4, d0
|
||||
; CHECK-NEXT: orr.w r12, r12, lr
|
||||
; CHECK-NEXT: orr.w r2, r2, r4
|
||||
; CHECK-NEXT: orr.w r2, r2, r12
|
||||
; CHECK-NEXT: beq .LBB3_9
|
||||
; CHECK-NEXT: .LBB3_7: @ %for.body.preheader1
|
||||
|
@ -334,7 +328,7 @@ define i32 @or_i32(i32* nocapture readonly %x, i32 %n) {
|
|||
; CHECK-NEXT: le lr, .LBB3_8
|
||||
; CHECK-NEXT: .LBB3_9: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: mov r0, r2
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
entry:
|
||||
%cmp6 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
|
||||
|
@ -386,8 +380,8 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
|
|||
define i32 @xor_i32(i32* nocapture readonly %x, i32 %n) {
|
||||
; CHECK-LABEL: xor_i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: cmp r1, #1
|
||||
; CHECK-NEXT: blt .LBB4_3
|
||||
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
|
||||
|
@ -413,13 +407,11 @@ define i32 @xor_i32(i32* nocapture readonly %x, i32 %n) {
|
|||
; CHECK-NEXT: veor q0, q1, q0
|
||||
; CHECK-NEXT: le lr, .LBB4_5
|
||||
; CHECK-NEXT: @ %bb.6: @ %middle.block
|
||||
; CHECK-NEXT: vmov r12, s3
|
||||
; CHECK-NEXT: vmov lr, r12, d1
|
||||
; CHECK-NEXT: cmp r3, r1
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov lr, s1
|
||||
; CHECK-NEXT: eor.w r12, r12, r2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: eor.w r2, r2, lr
|
||||
; CHECK-NEXT: vmov r2, r4, d0
|
||||
; CHECK-NEXT: eor.w r12, r12, lr
|
||||
; CHECK-NEXT: eor.w r2, r2, r4
|
||||
; CHECK-NEXT: eor.w r2, r2, r12
|
||||
; CHECK-NEXT: beq .LBB4_9
|
||||
; CHECK-NEXT: .LBB4_7: @ %for.body.preheader1
|
||||
|
@ -432,7 +424,7 @@ define i32 @xor_i32(i32* nocapture readonly %x, i32 %n) {
|
|||
; CHECK-NEXT: le lr, .LBB4_8
|
||||
; CHECK-NEXT: .LBB4_9: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: mov r0, r2
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
entry:
|
||||
%cmp6 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -16,11 +16,9 @@ entry:
|
|||
define arm_aapcs_vfpcc i32 @mul_v4i32(<4 x i32> %x) {
|
||||
; CHECK-LABEL: mul_v4i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: muls r0, r1, r0
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: muls r1, r2, r1
|
||||
; CHECK-NEXT: muls r0, r1, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
|
@ -33,11 +31,9 @@ define arm_aapcs_vfpcc i32 @mul_v8i32(<8 x i32> %x) {
|
|||
; CHECK-LABEL: mul_v8i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmul.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: muls r0, r1, r0
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: muls r1, r2, r1
|
||||
; CHECK-NEXT: muls r0, r1, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
|
@ -49,11 +45,9 @@ entry:
|
|||
define arm_aapcs_vfpcc i16 @mul_v4i16(<4 x i16> %x) {
|
||||
; CHECK-LABEL: mul_v4i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: muls r0, r1, r0
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r1, r2, d0
|
||||
; CHECK-NEXT: muls r1, r2, r1
|
||||
; CHECK-NEXT: muls r0, r1, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
|
@ -170,14 +164,14 @@ entry:
|
|||
define arm_aapcs_vfpcc i64 @mul_v2i64(<2 x i64> %x) {
|
||||
; CHECK-LABEL: mul_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: umull r0, r12, r2, r1
|
||||
; CHECK-NEXT: mla r2, r2, r3, r12
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: mla r1, r3, r1, r2
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r1, r12, d1
|
||||
; CHECK-NEXT: vmov r3, lr, d0
|
||||
; CHECK-NEXT: umull r0, r2, r3, r1
|
||||
; CHECK-NEXT: mla r2, r3, r12, r2
|
||||
; CHECK-NEXT: mla r1, lr, r1, r2
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%z = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %x)
|
||||
ret i64 %z
|
||||
|
@ -186,26 +180,22 @@ entry:
|
|||
define arm_aapcs_vfpcc i64 @mul_v4i64(<4 x i64> %x) {
|
||||
; CHECK-LABEL: mul_v4i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
|
||||
; CHECK-NEXT: vmov lr, s2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: vmov r6, s6
|
||||
; CHECK-NEXT: vmov r5, s7
|
||||
; CHECK-NEXT: umull r3, r12, r2, lr
|
||||
; CHECK-NEXT: umull r4, r8, r3, r1
|
||||
; CHECK-NEXT: umull r0, r7, r4, r6
|
||||
; CHECK-NEXT: mla r4, r4, r5, r7
|
||||
; CHECK-NEXT: vmov r5, s5
|
||||
; CHECK-NEXT: vmov r7, s1
|
||||
; CHECK-NEXT: mla r3, r3, r5, r8
|
||||
; CHECK-NEXT: vmov r5, s3
|
||||
; CHECK-NEXT: mla r2, r2, r5, r12
|
||||
; CHECK-NEXT: mla r2, r7, lr, r2
|
||||
; CHECK-NEXT: mla r1, r2, r1, r3
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
; CHECK-NEXT: vmov r1, r12, d1
|
||||
; CHECK-NEXT: vmov r3, lr, d0
|
||||
; CHECK-NEXT: vmov r5, r9, d2
|
||||
; CHECK-NEXT: vmov r6, r11, d3
|
||||
; CHECK-NEXT: umull r2, r8, r3, r1
|
||||
; CHECK-NEXT: mla r3, r3, r12, r8
|
||||
; CHECK-NEXT: umull r7, r10, r2, r5
|
||||
; CHECK-NEXT: mla r1, lr, r1, r3
|
||||
; CHECK-NEXT: mla r2, r2, r9, r10
|
||||
; CHECK-NEXT: umull r0, r4, r7, r6
|
||||
; CHECK-NEXT: mla r1, r1, r5, r2
|
||||
; CHECK-NEXT: mla r4, r7, r11, r4
|
||||
; CHECK-NEXT: mla r1, r1, r6, r4
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
entry:
|
||||
%z = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> %x)
|
||||
ret i64 %z
|
||||
|
@ -228,11 +218,9 @@ entry:
|
|||
define arm_aapcs_vfpcc i32 @mul_v4i32_acc(<4 x i32> %x, i32 %y) {
|
||||
; CHECK-LABEL: mul_v4i32_acc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r1, r2, d1
|
||||
; CHECK-NEXT: muls r1, r2, r1
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: muls r2, r3, r2
|
||||
; CHECK-NEXT: muls r1, r2, r1
|
||||
; CHECK-NEXT: muls r0, r1, r0
|
||||
|
@ -247,11 +235,9 @@ define arm_aapcs_vfpcc i32 @mul_v8i32_acc(<8 x i32> %x, i32 %y) {
|
|||
; CHECK-LABEL: mul_v8i32_acc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmul.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r1, r2, d1
|
||||
; CHECK-NEXT: muls r1, r2, r1
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: muls r2, r3, r2
|
||||
; CHECK-NEXT: muls r1, r2, r1
|
||||
; CHECK-NEXT: muls r0, r1, r0
|
||||
|
@ -265,11 +251,9 @@ entry:
|
|||
define arm_aapcs_vfpcc i16 @mul_v4i16_acc(<4 x i16> %x, i16 %y) {
|
||||
; CHECK-LABEL: mul_v4i16_acc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r1, r2, d1
|
||||
; CHECK-NEXT: muls r1, r2, r1
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: muls r2, r3, r2
|
||||
; CHECK-NEXT: muls r1, r2, r1
|
||||
; CHECK-NEXT: muls r0, r1, r0
|
||||
|
@ -405,20 +389,18 @@ entry:
|
|||
define arm_aapcs_vfpcc i64 @mul_v2i64_acc(<2 x i64> %x, i64 %y) {
|
||||
; CHECK-LABEL: mul_v2i64_acc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r4, s3
|
||||
; CHECK-NEXT: umull r12, lr, r3, r2
|
||||
; CHECK-NEXT: mla r3, r3, r4, lr
|
||||
; CHECK-NEXT: vmov r4, s1
|
||||
; CHECK-NEXT: mla r3, r4, r2, r3
|
||||
; CHECK-NEXT: umull r2, r4, r0, r12
|
||||
; CHECK-NEXT: mla r0, r0, r3, r4
|
||||
; CHECK-NEXT: mla r1, r1, r12, r0
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r2, r12, d1
|
||||
; CHECK-NEXT: vmov r3, lr, d0
|
||||
; CHECK-NEXT: umull r4, r5, r3, r2
|
||||
; CHECK-NEXT: mla r3, r3, r12, r5
|
||||
; CHECK-NEXT: mla r3, lr, r2, r3
|
||||
; CHECK-NEXT: umull r2, r5, r0, r4
|
||||
; CHECK-NEXT: mla r0, r0, r3, r5
|
||||
; CHECK-NEXT: mla r1, r1, r4, r0
|
||||
; CHECK-NEXT: mov r0, r2
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%z = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %x)
|
||||
%r = mul i64 %y, %z
|
||||
|
@ -428,30 +410,36 @@ entry:
|
|||
define arm_aapcs_vfpcc i64 @mul_v4i64_acc(<4 x i64> %x, i64 %y) {
|
||||
; CHECK-LABEL: mul_v4i64_acc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: vmov r12, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r4, s4
|
||||
; CHECK-NEXT: vmov r7, s6
|
||||
; CHECK-NEXT: vmov r6, s7
|
||||
; CHECK-NEXT: umull r2, lr, r3, r12
|
||||
; CHECK-NEXT: umull r5, r8, r2, r4
|
||||
; CHECK-NEXT: umull r10, r9, r5, r7
|
||||
; CHECK-NEXT: mla r5, r5, r6, r9
|
||||
; CHECK-NEXT: vmov r6, s5
|
||||
; CHECK-NEXT: mla r2, r2, r6, r8
|
||||
; CHECK-NEXT: vmov r6, s3
|
||||
; CHECK-NEXT: mla r3, r3, r6, lr
|
||||
; CHECK-NEXT: vmov r6, s1
|
||||
; CHECK-NEXT: mla r3, r6, r12, r3
|
||||
; CHECK-NEXT: mla r2, r3, r4, r2
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
; CHECK-NEXT: .pad #12
|
||||
; CHECK-NEXT: sub sp, #12
|
||||
; CHECK-NEXT: mov lr, r0
|
||||
; CHECK-NEXT: vmov r2, r0, d1
|
||||
; CHECK-NEXT: vmov r6, r9, d2
|
||||
; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill
|
||||
; CHECK-NEXT: vmov r7, r11, d3
|
||||
; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
|
||||
; CHECK-NEXT: vmov r3, r0, d0
|
||||
; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-NEXT: str r0, [sp] @ 4-byte Spill
|
||||
; CHECK-NEXT: umull r4, r8, r3, r2
|
||||
; CHECK-NEXT: mla r3, r3, r1, r8
|
||||
; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload
|
||||
; CHECK-NEXT: umull r5, r10, r4, r6
|
||||
; CHECK-NEXT: mla r2, r1, r2, r3
|
||||
; CHECK-NEXT: mla r4, r4, r9, r10
|
||||
; CHECK-NEXT: umull r0, r12, r5, r7
|
||||
; CHECK-NEXT: mla r2, r2, r6, r4
|
||||
; CHECK-NEXT: mla r5, r5, r11, r12
|
||||
; CHECK-NEXT: mla r3, r2, r7, r5
|
||||
; CHECK-NEXT: umull r2, r7, r0, r10
|
||||
; CHECK-NEXT: mla r0, r0, r3, r7
|
||||
; CHECK-NEXT: mla r1, r1, r10, r0
|
||||
; CHECK-NEXT: umull r2, r7, lr, r0
|
||||
; CHECK-NEXT: mla r1, lr, r3, r7
|
||||
; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload
|
||||
; CHECK-NEXT: mla r1, r3, r0, r1
|
||||
; CHECK-NEXT: mov r0, r2
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
|
||||
; CHECK-NEXT: add sp, #12
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
entry:
|
||||
%z = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> %x)
|
||||
%r = mul i64 %y, %z
|
||||
|
|
|
@ -66,32 +66,28 @@ entry:
|
|||
define <4 x i64> *@vld2_v2i64(<4 x i64> *%src, <2 x i64> *%dst) {
|
||||
; CHECK-LABEL: vld2_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #16]
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0], #32
|
||||
; CHECK-NEXT: vmov.f64 d2, d1
|
||||
; CHECK-NEXT: vmov.f32 s5, s3
|
||||
; CHECK-NEXT: vmov.f32 s6, s10
|
||||
; CHECK-NEXT: vmov.f32 s2, s8
|
||||
; CHECK-NEXT: vmov.f32 s7, s11
|
||||
; CHECK-NEXT: vmov.f32 s3, s9
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r4, s4
|
||||
; CHECK-NEXT: vmov r5, s0
|
||||
; CHECK-NEXT: vmov r12, s7
|
||||
; CHECK-NEXT: vmov lr, s3
|
||||
; CHECK-NEXT: adds r6, r3, r2
|
||||
; CHECK-NEXT: vmov r3, s5
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: adc.w r12, r12, lr
|
||||
; CHECK-NEXT: adds r5, r5, r4
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r5, r6
|
||||
; CHECK-NEXT: adcs r2, r3
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r2, r12
|
||||
; CHECK-NEXT: vmov.f64 d4, d1
|
||||
; CHECK-NEXT: vmov.f32 s9, s3
|
||||
; CHECK-NEXT: vmov.f32 s10, s6
|
||||
; CHECK-NEXT: vmov.f32 s2, s4
|
||||
; CHECK-NEXT: vmov.f32 s11, s7
|
||||
; CHECK-NEXT: vmov.f32 s3, s5
|
||||
; CHECK-NEXT: vmov r4, r7, d4
|
||||
; CHECK-NEXT: vmov r2, r5, d0
|
||||
; CHECK-NEXT: vmov lr, r12, d5
|
||||
; CHECK-NEXT: vmov r3, r6, d1
|
||||
; CHECK-NEXT: adds.w r3, r3, lr
|
||||
; CHECK-NEXT: adc.w r6, r6, r12
|
||||
; CHECK-NEXT: adds r2, r2, r4
|
||||
; CHECK-NEXT: adcs r7, r5
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r3
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r7, r6
|
||||
; CHECK-NEXT: vstrw.32 q0, [r1]
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
entry:
|
||||
%l1 = load <4 x i64>, <4 x i64>* %src, align 4
|
||||
%s1 = shufflevector <4 x i64> %l1, <4 x i64> undef, <2 x i32> <i32 0, i32 2>
|
||||
|
|
|
@ -319,32 +319,28 @@ entry:
|
|||
define void @vld2_v2i64(<4 x i64> *%src, <2 x i64> *%dst) {
|
||||
; CHECK-LABEL: vld2_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #16]
|
||||
; CHECK-NEXT: vmov.f64 d2, d1
|
||||
; CHECK-NEXT: vmov.f32 s5, s3
|
||||
; CHECK-NEXT: vmov.f32 s6, s10
|
||||
; CHECK-NEXT: vmov.f32 s2, s8
|
||||
; CHECK-NEXT: vmov.f32 s3, s9
|
||||
; CHECK-NEXT: vmov.f32 s7, s11
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r12, s7
|
||||
; CHECK-NEXT: adds.w lr, r0, r3
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r3, s5
|
||||
; CHECK-NEXT: adc.w r12, r12, r2
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: adds r0, r0, r4
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, lr
|
||||
; CHECK-NEXT: adcs r2, r3
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r2, r12
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vmov.f64 d4, d1
|
||||
; CHECK-NEXT: vmov.f32 s9, s3
|
||||
; CHECK-NEXT: vmov.f32 s10, s6
|
||||
; CHECK-NEXT: vmov.f32 s2, s4
|
||||
; CHECK-NEXT: vmov.f32 s11, s7
|
||||
; CHECK-NEXT: vmov.f32 s3, s5
|
||||
; CHECK-NEXT: vmov r0, r4, d4
|
||||
; CHECK-NEXT: vmov r5, r6, d0
|
||||
; CHECK-NEXT: vmov lr, r12, d5
|
||||
; CHECK-NEXT: vmov r3, r2, d1
|
||||
; CHECK-NEXT: adds.w r3, r3, lr
|
||||
; CHECK-NEXT: adc.w r2, r2, r12
|
||||
; CHECK-NEXT: adds r0, r0, r5
|
||||
; CHECK-NEXT: adcs r6, r4
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r3
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r6, r2
|
||||
; CHECK-NEXT: vstrw.32 q0, [r1]
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%l1 = load <4 x i64>, <4 x i64>* %src, align 8
|
||||
%s1 = shufflevector <4 x i64> %l1, <4 x i64> undef, <2 x i32> <i32 0, i32 2>
|
||||
|
@ -357,58 +353,50 @@ entry:
|
|||
define void @vld2_v4i64(<8 x i64> *%src, <4 x i64> *%dst) {
|
||||
; CHECK-LABEL: vld2_v4i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q5, [r0, #16]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #32]
|
||||
; CHECK-NEXT: vldrw.u32 q4, [r0, #48]
|
||||
; CHECK-NEXT: vmov.f64 d4, d1
|
||||
; CHECK-NEXT: vmov.f32 s9, s3
|
||||
; CHECK-NEXT: vmov.f32 s10, s22
|
||||
; CHECK-NEXT: vmov.f32 s2, s20
|
||||
; CHECK-NEXT: vmov.f32 s11, s23
|
||||
; CHECK-NEXT: vmov.f32 s3, s21
|
||||
; CHECK-NEXT: vmov r3, s10
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov.f64 d6, d3
|
||||
; CHECK-NEXT: vmov r12, s11
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov.f32 s13, s7
|
||||
; CHECK-NEXT: vmov.f32 s14, s18
|
||||
; CHECK-NEXT: vmov.f32 s6, s16
|
||||
; CHECK-NEXT: vmov.f32 s7, s17
|
||||
; CHECK-NEXT: vmov.f32 s15, s19
|
||||
; CHECK-NEXT: vmov r4, s6
|
||||
; CHECK-NEXT: vmov r5, s12
|
||||
; CHECK-NEXT: vmov r6, s4
|
||||
; CHECK-NEXT: adds.w lr, r0, r3
|
||||
; CHECK-NEXT: vmov r3, s14
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: adc.w r12, r12, r2
|
||||
; CHECK-NEXT: vmov r2, s15
|
||||
; CHECK-NEXT: adds r3, r3, r4
|
||||
; CHECK-NEXT: vmov r4, s5
|
||||
; CHECK-NEXT: adcs r0, r2
|
||||
; CHECK-NEXT: vmov r2, s13
|
||||
; CHECK-NEXT: adds r5, r5, r6
|
||||
; CHECK-NEXT: vmov r6, s0
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r5, r3
|
||||
; CHECK-NEXT: adcs r2, r4
|
||||
; CHECK-NEXT: vmov r4, s8
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r2, r0
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vstrw.32 q3, [r1, #16]
|
||||
; CHECK-NEXT: adds r4, r4, r6
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r4, lr
|
||||
; CHECK-NEXT: adcs r0, r2
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r0, r12
|
||||
; CHECK-NEXT: vstrw.32 q1, [r1]
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #16]
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r0, #48]
|
||||
; CHECK-NEXT: vmov.f64 d2, d1
|
||||
; CHECK-NEXT: vmov.f32 s5, s3
|
||||
; CHECK-NEXT: vmov.f32 s6, s10
|
||||
; CHECK-NEXT: vmov.f32 s2, s8
|
||||
; CHECK-NEXT: vmov.f32 s7, s11
|
||||
; CHECK-NEXT: vmov.f32 s3, s9
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #32]
|
||||
; CHECK-NEXT: vmov.f64 d8, d5
|
||||
; CHECK-NEXT: vmov.f32 s17, s11
|
||||
; CHECK-NEXT: vmov.f32 s18, s14
|
||||
; CHECK-NEXT: vmov.f32 s10, s12
|
||||
; CHECK-NEXT: vmov lr, r12, d3
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: vmov.f32 s19, s15
|
||||
; CHECK-NEXT: vmov.f32 s11, s13
|
||||
; CHECK-NEXT: vmov r0, r7, d8
|
||||
; CHECK-NEXT: vmov r5, r6, d4
|
||||
; CHECK-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-NEXT: vmov r3, r4, d9
|
||||
; CHECK-NEXT: adds r0, r0, r5
|
||||
; CHECK-NEXT: adc.w r8, r6, r7
|
||||
; CHECK-NEXT: vmov r6, r5, d5
|
||||
; CHECK-NEXT: vmov r2, r7, d0
|
||||
; CHECK-NEXT: adds r3, r3, r6
|
||||
; CHECK-NEXT: adc.w r6, r5, r4
|
||||
; CHECK-NEXT: vmov r5, r4, d2
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r0, r3
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r8, r6
|
||||
; CHECK-NEXT: vstrw.32 q1, [r1, #16]
|
||||
; CHECK-NEXT: adds r2, r2, r5
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, lr
|
||||
; CHECK-NEXT: adc.w r0, r7, r4
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r12
|
||||
; CHECK-NEXT: vstrw.32 q0, [r1]
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
|
||||
entry:
|
||||
%l1 = load <8 x i64>, <8 x i64>* %src, align 8
|
||||
%s1 = shufflevector <8 x i64> %l1, <8 x i64> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
|
@ -576,8 +564,7 @@ define void @vld2_v4f16(<8 x half> *%src, <4 x half> *%dst) {
|
|||
; CHECK-NEXT: vins.f16 s5, s8
|
||||
; CHECK-NEXT: vmov.f32 s1, s2
|
||||
; CHECK-NEXT: vadd.f16 q0, q0, q1
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov r0, r2, d0
|
||||
; CHECK-NEXT: strd r0, r2, [r1]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
|
|
|
@ -6,20 +6,21 @@
|
|||
define void @vld3_v2i32(<6 x i32> *%src, <2 x i32> *%dst) {
|
||||
; CHECK-LABEL: vld3_v2i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: ldrd r12, r3, [r0, #16]
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: ldrd r2, r0, [r0, #16]
|
||||
; CHECK-NEXT: vmov.f64 d2, d0
|
||||
; CHECK-NEXT: vmov.f32 s6, s3
|
||||
; CHECK-NEXT: vmov r12, lr, d0
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: add r2, r3
|
||||
; CHECK-NEXT: add.w r3, r12, lr
|
||||
; CHECK-NEXT: add r0, r2
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: add r0, r2
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: add r2, r12
|
||||
; CHECK-NEXT: add r2, r3
|
||||
; CHECK-NEXT: strd r0, r2, [r1]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: strd r2, r0, [r1]
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%l1 = load <6 x i32>, <6 x i32>* %src, align 4
|
||||
%s1 = shufflevector <6 x i32> %l1, <6 x i32> undef, <2 x i32> <i32 0, i32 3>
|
||||
|
@ -250,30 +251,30 @@ entry:
|
|||
define void @vld3_v4i16(<12 x i16> *%src, <4 x i16> *%dst) {
|
||||
; CHECK-LABEL: vld3_v4i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vldrh.u32 q3, [r0, #16]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[6]
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[0]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[7]
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[1]
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[4]
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r2, r0
|
||||
; CHECK-NEXT: vmov r0, s13
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[3]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r2, r0
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[2]
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, q2
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r2, r0
|
||||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[5]
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r2, r0
|
||||
; CHECK-NEXT: vldrh.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vmov.u16 r5, q0[6]
|
||||
; CHECK-NEXT: vmov.u16 r6, q0[0]
|
||||
; CHECK-NEXT: vmov r0, r3, d2
|
||||
; CHECK-NEXT: vmov.u16 lr, q0[2]
|
||||
; CHECK-NEXT: vmov r2, r4, d3
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r6, r5
|
||||
; CHECK-NEXT: vmov.u16 r5, q0[7]
|
||||
; CHECK-NEXT: vmov.u16 r6, q0[1]
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r6, r5
|
||||
; CHECK-NEXT: vmov.u16 r5, q0[3]
|
||||
; CHECK-NEXT: vmov.u16 r6, q0[4]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r5, r3
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r6, r2
|
||||
; CHECK-NEXT: vmov.u16 r12, q0[5]
|
||||
; CHECK-NEXT: vadd.i32 q0, q1, q2
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], lr, r0
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r12, r4
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vstrh.32 q0, [r1]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%l1 = load <12 x i16>, <12 x i16>* %src, align 4
|
||||
%s1 = shufflevector <12 x i16> %l1, <12 x i16> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
|
||||
|
@ -747,48 +748,37 @@ entry:
|
|||
define void @vld3_v2i64(<6 x i64> *%src, <2 x i64> *%dst) {
|
||||
; CHECK-LABEL: vld3_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #32]
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #16]
|
||||
; CHECK-NEXT: vldrw.u32 q4, [r0, #32]
|
||||
; CHECK-NEXT: vmov.f64 d6, d3
|
||||
; CHECK-NEXT: vmov.f32 s13, s7
|
||||
; CHECK-NEXT: vmov.f32 s14, s16
|
||||
; CHECK-NEXT: vmov.f32 s6, s10
|
||||
; CHECK-NEXT: vmov.f32 s7, s11
|
||||
; CHECK-NEXT: vmov.f32 s15, s17
|
||||
; CHECK-NEXT: vmov r3, s14
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.f64 d0, d4
|
||||
; CHECK-NEXT: vmov.f32 s1, s9
|
||||
; CHECK-NEXT: vmov.f32 s2, s18
|
||||
; CHECK-NEXT: vmov.f32 s3, s19
|
||||
; CHECK-NEXT: vmov r12, s15
|
||||
; CHECK-NEXT: vmov r2, s7
|
||||
; CHECK-NEXT: vmov r4, s4
|
||||
; CHECK-NEXT: adds.w lr, r0, r3
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: adc.w r2, r2, r12
|
||||
; CHECK-NEXT: adds.w lr, lr, r0
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: adc.w r12, r2, r3
|
||||
; CHECK-NEXT: vmov r3, s13
|
||||
; CHECK-NEXT: vmov r2, s5
|
||||
; CHECK-NEXT: adds r0, r0, r4
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: adcs r2, r3
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: adds r0, r0, r4
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, lr
|
||||
; CHECK-NEXT: adcs r2, r3
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r2, r12
|
||||
; CHECK-NEXT: vmov.f64 d6, d1
|
||||
; CHECK-NEXT: vmov.f32 s13, s3
|
||||
; CHECK-NEXT: vmov.f32 s14, s4
|
||||
; CHECK-NEXT: vmov.f32 s2, s10
|
||||
; CHECK-NEXT: vmov.f32 s3, s11
|
||||
; CHECK-NEXT: vmov.f32 s15, s5
|
||||
; CHECK-NEXT: vmov.f32 s10, s6
|
||||
; CHECK-NEXT: vmov.f32 s11, s7
|
||||
; CHECK-NEXT: vmov r5, r8, d6
|
||||
; CHECK-NEXT: vmov r6, r7, d0
|
||||
; CHECK-NEXT: vmov r0, r3, d1
|
||||
; CHECK-NEXT: vmov lr, r12, d7
|
||||
; CHECK-NEXT: vmov r2, r4, d5
|
||||
; CHECK-NEXT: adds.w r0, r0, lr
|
||||
; CHECK-NEXT: adc.w r3, r3, r12
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r2, r3, r4
|
||||
; CHECK-NEXT: vmov r3, r4, d4
|
||||
; CHECK-NEXT: adds r6, r6, r5
|
||||
; CHECK-NEXT: adc.w r7, r7, r8
|
||||
; CHECK-NEXT: adds r3, r3, r6
|
||||
; CHECK-NEXT: adcs r7, r4
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r7, r2
|
||||
; CHECK-NEXT: vstrw.32 q0, [r1]
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
|
||||
entry:
|
||||
%l1 = load <6 x i64>, <6 x i64>* %src, align 4
|
||||
%s1 = shufflevector <6 x i64> %l1, <6 x i64> undef, <2 x i32> <i32 0, i32 3>
|
||||
|
@ -805,86 +795,65 @@ define void @vld3_v4i64(<12 x i64> *%src, <4 x i64> *%dst) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: .pad #24
|
||||
; CHECK-NEXT: sub sp, #24
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #64]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #80]
|
||||
; CHECK-NEXT: vldrw.u32 q5, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r0, #16]
|
||||
; CHECK-NEXT: vmov.f64 d0, d4
|
||||
; CHECK-NEXT: vstrw.32 q2, [sp] @ 16-byte Spill
|
||||
; CHECK-NEXT: vmov.f32 s1, s9
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #48]
|
||||
; CHECK-NEXT: vmov.f32 s2, s6
|
||||
; CHECK-NEXT: vmov.f64 d8, d5
|
||||
; CHECK-NEXT: vmov.f32 s17, s11
|
||||
; CHECK-NEXT: vmov.f32 s18, s4
|
||||
; CHECK-NEXT: vmov.f32 s19, s5
|
||||
; CHECK-NEXT: vmov.f64 d12, d11
|
||||
; CHECK-NEXT: vmov.f32 s3, s7
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #32]
|
||||
; CHECK-NEXT: vmov.f32 s25, s23
|
||||
; CHECK-NEXT: vmov.f32 s26, s4
|
||||
; CHECK-NEXT: vmov.f32 s22, s14
|
||||
; CHECK-NEXT: vmov.f32 s27, s5
|
||||
; CHECK-NEXT: vmov.f32 s23, s15
|
||||
; CHECK-NEXT: vmov r3, s26
|
||||
; CHECK-NEXT: vmov r0, s22
|
||||
; CHECK-NEXT: vmov.f64 d14, d6
|
||||
; CHECK-NEXT: vmov r12, s27
|
||||
; CHECK-NEXT: vmov r2, s23
|
||||
; CHECK-NEXT: vmov.f32 s29, s13
|
||||
; CHECK-NEXT: vmov.f32 s30, s6
|
||||
; CHECK-NEXT: vmov.f32 s31, s7
|
||||
; CHECK-NEXT: vldrw.u32 q1, [sp] @ 16-byte Reload
|
||||
; CHECK-NEXT: vmov r5, s16
|
||||
; CHECK-NEXT: vmov.f32 s10, s6
|
||||
; CHECK-NEXT: vmov.f32 s11, s7
|
||||
; CHECK-NEXT: vmov r4, s10
|
||||
; CHECK-NEXT: vmov r6, s8
|
||||
; CHECK-NEXT: vmov r7, s24
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r0, #32]
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #16]
|
||||
; CHECK-NEXT: vldrw.u32 q6, [r0, #80]
|
||||
; CHECK-NEXT: vmov.f64 d2, d1
|
||||
; CHECK-NEXT: vldrw.u32 q4, [r0, #64]
|
||||
; CHECK-NEXT: vmov.f32 s5, s3
|
||||
; CHECK-NEXT: vmov.f32 s6, s12
|
||||
; CHECK-NEXT: vmov.f32 s2, s10
|
||||
; CHECK-NEXT: vmov.f32 s3, s11
|
||||
; CHECK-NEXT: vmov.f32 s10, s14
|
||||
; CHECK-NEXT: vmov.f32 s7, s13
|
||||
; CHECK-NEXT: vmov.f32 s11, s15
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r0, #48]
|
||||
; CHECK-NEXT: vmov.f64 d10, d7
|
||||
; CHECK-NEXT: vmov lr, r12, d3
|
||||
; CHECK-NEXT: vmov r5, r4, d1
|
||||
; CHECK-NEXT: vmov r3, r8, d5
|
||||
; CHECK-NEXT: vmov.f32 s21, s15
|
||||
; CHECK-NEXT: vmov.f32 s22, s24
|
||||
; CHECK-NEXT: vmov.f32 s14, s18
|
||||
; CHECK-NEXT: vmov.f32 s23, s25
|
||||
; CHECK-NEXT: vmov.f32 s15, s19
|
||||
; CHECK-NEXT: vmov.f32 s18, s26
|
||||
; CHECK-NEXT: vmov r6, r7, d10
|
||||
; CHECK-NEXT: vmov.f32 s19, s27
|
||||
; CHECK-NEXT: adds.w r0, r5, lr
|
||||
; CHECK-NEXT: adc.w r5, r4, r12
|
||||
; CHECK-NEXT: adds.w lr, r0, r3
|
||||
; CHECK-NEXT: vmov r0, s30
|
||||
; CHECK-NEXT: adc.w r3, r2, r12
|
||||
; CHECK-NEXT: vmov r2, s31
|
||||
; CHECK-NEXT: adds.w lr, lr, r0
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: adc.w r12, r3, r2
|
||||
; CHECK-NEXT: vmov r3, s19
|
||||
; CHECK-NEXT: vmov r2, s11
|
||||
; CHECK-NEXT: adds r0, r0, r4
|
||||
; CHECK-NEXT: vmov r4, s2
|
||||
; CHECK-NEXT: adcs r2, r3
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: adds r0, r0, r4
|
||||
; CHECK-NEXT: vmov r4, s9
|
||||
; CHECK-NEXT: adc.w r8, r2, r3
|
||||
; CHECK-NEXT: vmov r3, s17
|
||||
; CHECK-NEXT: adds r5, r5, r6
|
||||
; CHECK-NEXT: vmov r6, s0
|
||||
; CHECK-NEXT: vmov r2, s20
|
||||
; CHECK-NEXT: adcs r3, r4
|
||||
; CHECK-NEXT: vmov r4, s1
|
||||
; CHECK-NEXT: adds r5, r5, r6
|
||||
; CHECK-NEXT: vmov r6, s21
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r5, r0
|
||||
; CHECK-NEXT: vmov r0, s29
|
||||
; CHECK-NEXT: adcs r3, r4
|
||||
; CHECK-NEXT: vmov r4, s25
|
||||
; CHECK-NEXT: vmov r4, r2, d6
|
||||
; CHECK-NEXT: adc.w r12, r5, r8
|
||||
; CHECK-NEXT: vmov r5, r0, d8
|
||||
; CHECK-NEXT: adds r6, r6, r4
|
||||
; CHECK-NEXT: adcs r2, r7
|
||||
; CHECK-NEXT: adds r6, r6, r5
|
||||
; CHECK-NEXT: adc.w r8, r2, r0
|
||||
; CHECK-NEXT: vmov r7, r4, d11
|
||||
; CHECK-NEXT: vmov r2, r5, d7
|
||||
; CHECK-NEXT: vmov r3, r0, d0
|
||||
; CHECK-NEXT: adds r2, r2, r7
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r8
|
||||
; CHECK-NEXT: adc.w r7, r5, r4
|
||||
; CHECK-NEXT: vmov r5, r4, d9
|
||||
; CHECK-NEXT: adds r2, r2, r5
|
||||
; CHECK-NEXT: adcs r7, r4
|
||||
; CHECK-NEXT: vmov r5, r4, d2
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r6, r2
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r8, r7
|
||||
; CHECK-NEXT: vstrw.32 q1, [r1, #16]
|
||||
; CHECK-NEXT: adcs r4, r6
|
||||
; CHECK-NEXT: vmov r6, s28
|
||||
; CHECK-NEXT: adds r2, r2, r6
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, lr
|
||||
; CHECK-NEXT: adds r3, r3, r5
|
||||
; CHECK-NEXT: adcs r0, r4
|
||||
; CHECK-NEXT: vmov r4, r5, d4
|
||||
; CHECK-NEXT: adds r3, r3, r4
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, lr
|
||||
; CHECK-NEXT: adcs r0, r5
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r12
|
||||
; CHECK-NEXT: vstrw.32 q0, [r1]
|
||||
; CHECK-NEXT: add sp, #24
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
|
||||
entry:
|
||||
%l1 = load <12 x i64>, <12 x i64>* %src, align 4
|
||||
|
@ -1133,8 +1102,8 @@ entry:
|
|||
define void @vld3_v4f16(<12 x half> *%src, <4 x half> *%dst) {
|
||||
; CHECK-LABEL: vld3_v4f16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: .vsave {d8}
|
||||
; CHECK-NEXT: vpush {d8}
|
||||
; CHECK-NEXT: ldrd r2, r3, [r0, #16]
|
||||
; CHECK-NEXT: vmov.32 q2[0], r2
|
||||
; CHECK-NEXT: vmov.32 q2[1], r3
|
||||
|
@ -1144,11 +1113,11 @@ define void @vld3_v4f16(<12 x half> *%src, <4 x half> *%dst) {
|
|||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vmovx.f16 s8, s8
|
||||
; CHECK-NEXT: vmovx.f16 s12, s4
|
||||
; CHECK-NEXT: vmovx.f16 s16, s6
|
||||
; CHECK-NEXT: vmovx.f16 s16, s5
|
||||
; CHECK-NEXT: vins.f16 s12, s6
|
||||
; CHECK-NEXT: vmovx.f16 s18, s5
|
||||
; CHECK-NEXT: vins.f16 s4, s16
|
||||
; CHECK-NEXT: vmovx.f16 s16, s6
|
||||
; CHECK-NEXT: vins.f16 s5, s16
|
||||
; CHECK-NEXT: vins.f16 s4, s18
|
||||
; CHECK-NEXT: vmovx.f16 s13, s7
|
||||
; CHECK-NEXT: vins.f16 s7, s8
|
||||
; CHECK-NEXT: vmov.f32 s0, s5
|
||||
|
@ -1156,10 +1125,9 @@ define void @vld3_v4f16(<12 x half> *%src, <4 x half> *%dst) {
|
|||
; CHECK-NEXT: vmov.f32 s5, s7
|
||||
; CHECK-NEXT: vadd.f16 q1, q1, q3
|
||||
; CHECK-NEXT: vadd.f16 q0, q1, q0
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov r0, r2, d0
|
||||
; CHECK-NEXT: strd r0, r2, [r1]
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: vpop {d8}
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%l1 = load <12 x half>, <12 x half>* %src, align 4
|
||||
|
|
|
@ -102,62 +102,51 @@ entry:
|
|||
define <8 x i64> *@vld4_v2i64(<8 x i64> *%src, <2 x i64> *%dst) {
|
||||
; CHECK-LABEL: vld4_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: .pad #4
|
||||
; CHECK-NEXT: sub sp, #4
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #32]
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0], #64
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r0, #-48]
|
||||
; CHECK-NEXT: vldrw.u32 q5, [r0, #-16]
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #48]
|
||||
; CHECK-NEXT: vldrw.u32 q4, [r0, #32]
|
||||
; CHECK-NEXT: vmov.f64 d2, d1
|
||||
; CHECK-NEXT: vmov.f64 d8, d7
|
||||
; CHECK-NEXT: vmov.f32 s17, s15
|
||||
; CHECK-NEXT: vmov.f32 s18, s22
|
||||
; CHECK-NEXT: vmov.f32 s14, s20
|
||||
; CHECK-NEXT: vmov.f32 s19, s23
|
||||
; CHECK-NEXT: vmov.f32 s15, s21
|
||||
; CHECK-NEXT: vmov r2, s18
|
||||
; CHECK-NEXT: vmov r3, s14
|
||||
; CHECK-NEXT: vmov.f32 s5, s3
|
||||
; CHECK-NEXT: vmov.f32 s6, s10
|
||||
; CHECK-NEXT: vmov.f32 s2, s8
|
||||
; CHECK-NEXT: vmov.f32 s3, s9
|
||||
; CHECK-NEXT: vmov.f32 s7, s11
|
||||
; CHECK-NEXT: vmov r12, s19
|
||||
; CHECK-NEXT: vmov lr, s15
|
||||
; CHECK-NEXT: vmov r4, s6
|
||||
; CHECK-NEXT: vmov r5, s2
|
||||
; CHECK-NEXT: vmov r7, s0
|
||||
; CHECK-NEXT: adds r6, r3, r2
|
||||
; CHECK-NEXT: vmov r2, s7
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: adc.w r12, r12, lr
|
||||
; CHECK-NEXT: adds r5, r5, r4
|
||||
; CHECK-NEXT: vmov r4, s16
|
||||
; CHECK-NEXT: adcs r2, r3
|
||||
; CHECK-NEXT: adds.w lr, r5, r6
|
||||
; CHECK-NEXT: adc.w r12, r12, r2
|
||||
; CHECK-NEXT: vmov r2, s12
|
||||
; CHECK-NEXT: vmov r6, s17
|
||||
; CHECK-NEXT: vmov r5, s13
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: adds r2, r2, r4
|
||||
; CHECK-NEXT: vmov r4, s1
|
||||
; CHECK-NEXT: adcs r6, r5
|
||||
; CHECK-NEXT: vmov r5, s5
|
||||
; CHECK-NEXT: adds r3, r3, r7
|
||||
; CHECK-NEXT: adcs r4, r5
|
||||
; CHECK-NEXT: adds r2, r2, r3
|
||||
; CHECK-NEXT: adc.w r3, r4, r6
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, lr
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r12
|
||||
; CHECK-NEXT: vmov.f32 s3, s9
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0], #64
|
||||
; CHECK-NEXT: vmov.f64 d6, d5
|
||||
; CHECK-NEXT: vmov.f32 s13, s11
|
||||
; CHECK-NEXT: vmov.f32 s14, s18
|
||||
; CHECK-NEXT: vmov.f32 s10, s16
|
||||
; CHECK-NEXT: vmov.f32 s15, s19
|
||||
; CHECK-NEXT: vmov.f32 s11, s17
|
||||
; CHECK-NEXT: vmov lr, r12, d3
|
||||
; CHECK-NEXT: vmov r2, r7, d1
|
||||
; CHECK-NEXT: vmov r4, r8, d7
|
||||
; CHECK-NEXT: vmov r3, r6, d5
|
||||
; CHECK-NEXT: adds.w r2, r2, lr
|
||||
; CHECK-NEXT: adc.w r7, r7, r12
|
||||
; CHECK-NEXT: adds r3, r3, r4
|
||||
; CHECK-NEXT: vmov r4, r5, d2
|
||||
; CHECK-NEXT: adc.w r6, r6, r8
|
||||
; CHECK-NEXT: adds.w r12, r3, r2
|
||||
; CHECK-NEXT: vmov r3, r2, d0
|
||||
; CHECK-NEXT: adc.w lr, r6, r7
|
||||
; CHECK-NEXT: adds r3, r3, r4
|
||||
; CHECK-NEXT: vmov r6, r4, d6
|
||||
; CHECK-NEXT: adcs r2, r5
|
||||
; CHECK-NEXT: vmov r5, r7, d4
|
||||
; CHECK-NEXT: adds r5, r5, r6
|
||||
; CHECK-NEXT: adcs r4, r7
|
||||
; CHECK-NEXT: adds r3, r3, r5
|
||||
; CHECK-NEXT: adcs r2, r4
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r12
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r2, lr
|
||||
; CHECK-NEXT: vstrw.32 q0, [r1]
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: add sp, #4
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
|
||||
entry:
|
||||
%l1 = load <8 x i64>, <8 x i64>* %src, align 4
|
||||
%s1 = shufflevector <8 x i64> %l1, <8 x i64> undef, <2 x i32> <i32 0, i32 4>
|
||||
|
|
|
@ -644,59 +644,51 @@ entry:
|
|||
define void @vld4_v2i64(<8 x i64> *%src, <2 x i64> *%dst) {
|
||||
; CHECK-LABEL: vld4_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r0, #16]
|
||||
; CHECK-NEXT: vldrw.u32 q5, [r0, #48]
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #32]
|
||||
; CHECK-NEXT: vmov.f64 d8, d7
|
||||
; CHECK-NEXT: vmov.f32 s17, s15
|
||||
; CHECK-NEXT: vmov.f32 s18, s22
|
||||
; CHECK-NEXT: vmov.f32 s14, s20
|
||||
; CHECK-NEXT: vmov.f32 s19, s23
|
||||
; CHECK-NEXT: vmov.f32 s15, s21
|
||||
; CHECK-NEXT: vmov r3, s18
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #48]
|
||||
; CHECK-NEXT: vldrw.u32 q4, [r0, #32]
|
||||
; CHECK-NEXT: vmov.f64 d2, d1
|
||||
; CHECK-NEXT: vmov r12, s19
|
||||
; CHECK-NEXT: vmov r2, s15
|
||||
; CHECK-NEXT: vmov.f32 s5, s3
|
||||
; CHECK-NEXT: vmov.f32 s6, s10
|
||||
; CHECK-NEXT: vmov.f32 s2, s8
|
||||
; CHECK-NEXT: vmov.f32 s3, s9
|
||||
; CHECK-NEXT: vmov.f32 s7, s11
|
||||
; CHECK-NEXT: vmov r4, s2
|
||||
; CHECK-NEXT: vmov r5, s4
|
||||
; CHECK-NEXT: vmov r6, s0
|
||||
; CHECK-NEXT: adds.w lr, r0, r3
|
||||
; CHECK-NEXT: vmov r3, s7
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: adc.w r12, r12, r2
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: adds r2, r2, r4
|
||||
; CHECK-NEXT: vmov r4, s13
|
||||
; CHECK-NEXT: adcs r0, r3
|
||||
; CHECK-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-NEXT: adc.w r12, r12, r0
|
||||
; CHECK-NEXT: vmov r0, s16
|
||||
; CHECK-NEXT: vmov r2, s12
|
||||
; CHECK-NEXT: vmov r3, s17
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r2, r4, r3
|
||||
; CHECK-NEXT: vmov r3, s5
|
||||
; CHECK-NEXT: vmov r4, s1
|
||||
; CHECK-NEXT: adds r5, r5, r6
|
||||
; CHECK-NEXT: adcs r3, r4
|
||||
; CHECK-NEXT: vmov.f32 s3, s9
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0]
|
||||
; CHECK-NEXT: vmov.f64 d6, d5
|
||||
; CHECK-NEXT: vmov.f32 s13, s11
|
||||
; CHECK-NEXT: vmov.f32 s14, s18
|
||||
; CHECK-NEXT: vmov.f32 s10, s16
|
||||
; CHECK-NEXT: vmov.f32 s15, s19
|
||||
; CHECK-NEXT: vmov.f32 s11, s17
|
||||
; CHECK-NEXT: vmov lr, r12, d3
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: vmov r0, r8, d7
|
||||
; CHECK-NEXT: vmov r5, r6, d5
|
||||
; CHECK-NEXT: adds.w r2, r2, lr
|
||||
; CHECK-NEXT: adc.w r3, r3, r12
|
||||
; CHECK-NEXT: vmov r4, r12, d2
|
||||
; CHECK-NEXT: adds r0, r0, r5
|
||||
; CHECK-NEXT: adcs r2, r3
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, lr
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r2, r12
|
||||
; CHECK-NEXT: vmov r5, r7, d0
|
||||
; CHECK-NEXT: adc.w r6, r6, r8
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w lr, r6, r3
|
||||
; CHECK-NEXT: vmov r3, r6, d6
|
||||
; CHECK-NEXT: adds r5, r5, r4
|
||||
; CHECK-NEXT: vmov r4, r2, d4
|
||||
; CHECK-NEXT: adc.w r7, r7, r12
|
||||
; CHECK-NEXT: adds r3, r3, r4
|
||||
; CHECK-NEXT: adcs r2, r6
|
||||
; CHECK-NEXT: adds r3, r3, r5
|
||||
; CHECK-NEXT: adcs r2, r7
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r2, lr
|
||||
; CHECK-NEXT: vstrw.32 q0, [r1]
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
|
||||
entry:
|
||||
%l1 = load <8 x i64>, <8 x i64>* %src, align 8
|
||||
%s1 = shufflevector <8 x i64> %l1, <8 x i64> undef, <2 x i32> <i32 0, i32 4>
|
||||
|
@ -717,112 +709,90 @@ define void @vld4_v4i64(<16 x i64> *%src, <4 x i64> *%dst) {
|
|||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: .pad #64
|
||||
; CHECK-NEXT: sub sp, #64
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #64]
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #96]
|
||||
; CHECK-NEXT: vldrw.u32 q4, [r0, #80]
|
||||
; CHECK-NEXT: vldrw.u32 q6, [r0, #16]
|
||||
; CHECK-NEXT: vmov.f64 d4, d3
|
||||
; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r0]
|
||||
; CHECK-NEXT: vstrw.32 q1, [sp, #32] @ 16-byte Spill
|
||||
; CHECK-NEXT: vmov.f32 s9, s7
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #32]
|
||||
; CHECK-NEXT: vmov.f32 s10, s2
|
||||
; CHECK-NEXT: vmov.f32 s11, s3
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #112]
|
||||
; CHECK-NEXT: vmov.f64 d14, d9
|
||||
; CHECK-NEXT: vstrw.32 q2, [sp, #48] @ 16-byte Spill
|
||||
; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill
|
||||
; CHECK-NEXT: vmov.f32 s29, s19
|
||||
; CHECK-NEXT: vmov.f32 s30, s2
|
||||
; CHECK-NEXT: vmov.f64 d4, d13
|
||||
; CHECK-NEXT: vmov.f32 s31, s3
|
||||
; CHECK-NEXT: .pad #16
|
||||
; CHECK-NEXT: sub sp, #16
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #48]
|
||||
; CHECK-NEXT: vmov.f32 s9, s27
|
||||
; CHECK-NEXT: vmov.f32 s10, s2
|
||||
; CHECK-NEXT: vmov.f32 s26, s0
|
||||
; CHECK-NEXT: vmov.f32 s11, s3
|
||||
; CHECK-NEXT: vmov.f32 s27, s1
|
||||
; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload
|
||||
; CHECK-NEXT: vmov r3, s10
|
||||
; CHECK-NEXT: vmov r0, s26
|
||||
; CHECK-NEXT: vmov.f64 d10, d7
|
||||
; CHECK-NEXT: vmov r12, s11
|
||||
; CHECK-NEXT: vmov r2, s27
|
||||
; CHECK-NEXT: vmov.f32 s21, s15
|
||||
; CHECK-NEXT: vmov.f32 s22, s6
|
||||
; CHECK-NEXT: vmov.f32 s14, s4
|
||||
; CHECK-NEXT: vmov.f32 s15, s5
|
||||
; CHECK-NEXT: vmov.f32 s23, s7
|
||||
; CHECK-NEXT: vldrw.u32 q1, [sp, #16] @ 16-byte Reload
|
||||
; CHECK-NEXT: vmov r4, s14
|
||||
; CHECK-NEXT: vldrw.u32 q5, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q4, [r0, #32]
|
||||
; CHECK-NEXT: vmov.f64 d6, d3
|
||||
; CHECK-NEXT: vldrw.u32 q6, [r0, #112]
|
||||
; CHECK-NEXT: vmov.f32 s13, s7
|
||||
; CHECK-NEXT: vmov.f32 s14, s2
|
||||
; CHECK-NEXT: vmov.f32 s6, s0
|
||||
; CHECK-NEXT: vmov.f32 s15, s3
|
||||
; CHECK-NEXT: vmov.f32 s7, s1
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #96]
|
||||
; CHECK-NEXT: vmov.f64 d4, d11
|
||||
; CHECK-NEXT: vmov.f32 s9, s23
|
||||
; CHECK-NEXT: vmov r3, r2, d7
|
||||
; CHECK-NEXT: vmov r4, r5, d3
|
||||
; CHECK-NEXT: vmov.f32 s10, s18
|
||||
; CHECK-NEXT: vmov.f32 s11, s19
|
||||
; CHECK-NEXT: vmov.f32 s22, s16
|
||||
; CHECK-NEXT: vmov.f32 s23, s17
|
||||
; CHECK-NEXT: vldrw.u32 q4, [r0, #64]
|
||||
; CHECK-NEXT: vmov q7, q5
|
||||
; CHECK-NEXT: vstrw.32 q5, [sp] @ 16-byte Spill
|
||||
; CHECK-NEXT: vldrw.u32 q5, [r0, #80]
|
||||
; CHECK-NEXT: vmov r0, r6, d15
|
||||
; CHECK-NEXT: vmov.f64 d14, d11
|
||||
; CHECK-NEXT: vmov.f32 s29, s23
|
||||
; CHECK-NEXT: vmov lr, r12, d5
|
||||
; CHECK-NEXT: vmov.f32 s30, s26
|
||||
; CHECK-NEXT: vmov.f32 s22, s24
|
||||
; CHECK-NEXT: vmov.f32 s31, s27
|
||||
; CHECK-NEXT: vmov.f32 s23, s25
|
||||
; CHECK-NEXT: vmov.f64 d12, d9
|
||||
; CHECK-NEXT: adds r7, r4, r3
|
||||
; CHECK-NEXT: adcs r5, r2
|
||||
; CHECK-NEXT: vmov r4, r8, d14
|
||||
; CHECK-NEXT: vmov r2, r3, d10
|
||||
; CHECK-NEXT: vmov.f32 s25, s19
|
||||
; CHECK-NEXT: vmov.f32 s26, s2
|
||||
; CHECK-NEXT: vmov.f32 s18, s0
|
||||
; CHECK-NEXT: vmov.f32 s27, s3
|
||||
; CHECK-NEXT: vmov.f32 s19, s1
|
||||
; CHECK-NEXT: vldrw.u32 q0, [sp, #48] @ 16-byte Reload
|
||||
; CHECK-NEXT: adds.w lr, r0, r3
|
||||
; CHECK-NEXT: vmov r3, s22
|
||||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: adc.w r12, r12, r2
|
||||
; CHECK-NEXT: vmov r2, s23
|
||||
; CHECK-NEXT: adds r3, r3, r4
|
||||
; CHECK-NEXT: vmov r4, s28
|
||||
; CHECK-NEXT: adcs r0, r2
|
||||
; CHECK-NEXT: adds.w lr, lr, r3
|
||||
; CHECK-NEXT: vmov r2, s16
|
||||
; CHECK-NEXT: adc.w r12, r12, r0
|
||||
; CHECK-NEXT: vmov r0, s29
|
||||
; CHECK-NEXT: vmov r3, s17
|
||||
; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload
|
||||
; CHECK-NEXT: adds.w r0, r0, lr
|
||||
; CHECK-NEXT: adc.w r6, r6, r12
|
||||
; CHECK-NEXT: adds.w lr, r0, r7
|
||||
; CHECK-NEXT: adc.w r12, r6, r5
|
||||
; CHECK-NEXT: vmov r6, r5, d12
|
||||
; CHECK-NEXT: adds r2, r2, r4
|
||||
; CHECK-NEXT: adcs r3, r0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vldrw.u32 q0, [sp, #32] @ 16-byte Reload
|
||||
; CHECK-NEXT: vmov.f32 s2, s4
|
||||
; CHECK-NEXT: vmov.f32 s3, s5
|
||||
; CHECK-NEXT: vldrw.u32 q1, [sp, #48] @ 16-byte Reload
|
||||
; CHECK-NEXT: vmov r6, s0
|
||||
; CHECK-NEXT: vmov r5, s4
|
||||
; CHECK-NEXT: vmov r4, s1
|
||||
; CHECK-NEXT: vmov r7, s6
|
||||
; CHECK-NEXT: adds r5, r5, r6
|
||||
; CHECK-NEXT: vmov r6, s18
|
||||
; CHECK-NEXT: adcs r4, r0
|
||||
; CHECK-NEXT: adds.w r9, r5, r2
|
||||
; CHECK-NEXT: vmov r5, s30
|
||||
; CHECK-NEXT: adc.w r8, r4, r3
|
||||
; CHECK-NEXT: vmov r2, s31
|
||||
; CHECK-NEXT: vmov r4, s19
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: adds r5, r5, r6
|
||||
; CHECK-NEXT: vmov r6, s3
|
||||
; CHECK-NEXT: adcs r2, r4
|
||||
; CHECK-NEXT: vmov r4, s7
|
||||
; CHECK-NEXT: adds r3, r3, r7
|
||||
; CHECK-NEXT: vmov r7, s12
|
||||
; CHECK-NEXT: adcs r4, r6
|
||||
; CHECK-NEXT: vmov r4, r0, d8
|
||||
; CHECK-NEXT: adc.w r3, r3, r8
|
||||
; CHECK-NEXT: adds r6, r6, r4
|
||||
; CHECK-NEXT: adcs r0, r5
|
||||
; CHECK-NEXT: adds.w r9, r6, r2
|
||||
; CHECK-NEXT: adc.w r8, r0, r3
|
||||
; CHECK-NEXT: vmov r5, r4, d15
|
||||
; CHECK-NEXT: vmov r3, r6, d11
|
||||
; CHECK-NEXT: vmov r7, r0, d9
|
||||
; CHECK-NEXT: adds r3, r3, r5
|
||||
; CHECK-NEXT: vmov r6, s20
|
||||
; CHECK-NEXT: adc.w r10, r4, r2
|
||||
; CHECK-NEXT: vmov r4, s21
|
||||
; CHECK-NEXT: adcs r6, r4
|
||||
; CHECK-NEXT: vmov r5, r4, d13
|
||||
; CHECK-NEXT: adds r5, r5, r7
|
||||
; CHECK-NEXT: adcs r0, r4
|
||||
; CHECK-NEXT: adds r3, r3, r5
|
||||
; CHECK-NEXT: adc.w r10, r0, r6
|
||||
; CHECK-NEXT: vmov r4, r5, d4
|
||||
; CHECK-NEXT: vmov r6, r7, d0
|
||||
; CHECK-NEXT: vmov r2, r0, d2
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r9, r3
|
||||
; CHECK-NEXT: vmov r5, s13
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r8, r10
|
||||
; CHECK-NEXT: vmov r2, s24
|
||||
; CHECK-NEXT: vstrw.32 q1, [r1, #16]
|
||||
; CHECK-NEXT: adds r6, r6, r7
|
||||
; CHECK-NEXT: vmov r7, s25
|
||||
; CHECK-NEXT: adcs r4, r5
|
||||
; CHECK-NEXT: vmov r5, s9
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r2, r7, r5
|
||||
; CHECK-NEXT: adds r0, r0, r6
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, lr
|
||||
; CHECK-NEXT: adc.w r0, r4, r2
|
||||
; CHECK-NEXT: adds r4, r4, r6
|
||||
; CHECK-NEXT: adcs r5, r7
|
||||
; CHECK-NEXT: vmov r6, r7, d6
|
||||
; CHECK-NEXT: adds r2, r2, r6
|
||||
; CHECK-NEXT: adcs r0, r7
|
||||
; CHECK-NEXT: adds r2, r2, r4
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, lr
|
||||
; CHECK-NEXT: adcs r0, r5
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r12
|
||||
; CHECK-NEXT: vstrw.32 q0, [r1]
|
||||
; CHECK-NEXT: add sp, #64
|
||||
; CHECK-NEXT: add sp, #16
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
|
||||
entry:
|
||||
|
@ -1101,31 +1071,30 @@ define void @vld4_v4f16(<16 x half> *%src, <4 x half> *%dst) {
|
|||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vldrh.u16 q0, [r0]
|
||||
; CHECK-NEXT: vmovx.f16 s4, s3
|
||||
; CHECK-NEXT: vmovx.f16 s8, s1
|
||||
; CHECK-NEXT: vins.f16 s8, s4
|
||||
; CHECK-NEXT: vldrh.u16 q1, [r0, #16]
|
||||
; CHECK-NEXT: vins.f16 s1, s3
|
||||
; CHECK-NEXT: vmovx.f16 s16, s2
|
||||
; CHECK-NEXT: vmovx.f16 s12, s7
|
||||
; CHECK-NEXT: vmovx.f16 s9, s5
|
||||
; CHECK-NEXT: vins.f16 s9, s12
|
||||
; CHECK-NEXT: vmovx.f16 s12, s0
|
||||
; CHECK-NEXT: vins.f16 s12, s16
|
||||
; CHECK-NEXT: vins.f16 s5, s7
|
||||
; CHECK-NEXT: vmovx.f16 s16, s6
|
||||
; CHECK-NEXT: vmovx.f16 s13, s4
|
||||
; CHECK-NEXT: vins.f16 s13, s16
|
||||
; CHECK-NEXT: vmovx.f16 s8, s2
|
||||
; CHECK-NEXT: vmovx.f16 s4, s0
|
||||
; CHECK-NEXT: vins.f16 s4, s8
|
||||
; CHECK-NEXT: vldrh.u16 q2, [r0, #16]
|
||||
; CHECK-NEXT: vins.f16 s0, s2
|
||||
; CHECK-NEXT: vins.f16 s4, s6
|
||||
; CHECK-NEXT: vmovx.f16 s16, s3
|
||||
; CHECK-NEXT: vmovx.f16 s12, s10
|
||||
; CHECK-NEXT: vmovx.f16 s5, s8
|
||||
; CHECK-NEXT: vins.f16 s5, s12
|
||||
; CHECK-NEXT: vmovx.f16 s12, s1
|
||||
; CHECK-NEXT: vins.f16 s12, s16
|
||||
; CHECK-NEXT: vins.f16 s8, s10
|
||||
; CHECK-NEXT: vmovx.f16 s16, s11
|
||||
; CHECK-NEXT: vmovx.f16 s13, s9
|
||||
; CHECK-NEXT: vins.f16 s1, s3
|
||||
; CHECK-NEXT: vins.f16 s13, s16
|
||||
; CHECK-NEXT: vins.f16 s9, s11
|
||||
; CHECK-NEXT: vmov.f32 s16, s1
|
||||
; CHECK-NEXT: vmov.f32 s1, s4
|
||||
; CHECK-NEXT: vmov.f32 s17, s5
|
||||
; CHECK-NEXT: vmov.f32 s1, s8
|
||||
; CHECK-NEXT: vmov.f32 s17, s9
|
||||
; CHECK-NEXT: vadd.f16 q0, q0, q1
|
||||
; CHECK-NEXT: vadd.f16 q3, q4, q3
|
||||
; CHECK-NEXT: vadd.f16 q0, q0, q3
|
||||
; CHECK-NEXT: vadd.f16 q2, q4, q2
|
||||
; CHECK-NEXT: vadd.f16 q0, q0, q2
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov r0, r2, d0
|
||||
; CHECK-NEXT: strd r0, r2, [r1]
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: bx lr
|
||||
|
|
|
@ -495,19 +495,16 @@ define arm_aapcs_vfpcc i64 @uminv2i64(<2 x i64> %vec, i64 %min) {
|
|||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r12, s3
|
||||
; CHECK-NEXT: vmov lr, s1
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r2, r12, d1
|
||||
; CHECK-NEXT: vmov r3, lr, d0
|
||||
; CHECK-NEXT: cmp r3, r2
|
||||
; CHECK-NEXT: csel r4, r3, r2, lo
|
||||
; CHECK-NEXT: cmp lr, r12
|
||||
; CHECK-NEXT: csel r4, r2, r3, lo
|
||||
; CHECK-NEXT: cmp r2, r3
|
||||
; CHECK-NEXT: csel r2, r2, r3, lo
|
||||
; CHECK-NEXT: cmp lr, r12
|
||||
; CHECK-NEXT: csel r5, r2, r4, eq
|
||||
; CHECK-NEXT: csel r2, r3, r2, lo
|
||||
; CHECK-NEXT: csel r3, lr, r12, lo
|
||||
; CHECK-NEXT: csel r5, r4, r2, eq
|
||||
; CHECK-NEXT: movs r4, #0
|
||||
; CHECK-NEXT: subs r2, r5, r0
|
||||
; CHECK-NEXT: mov.w r4, #0
|
||||
; CHECK-NEXT: sbcs.w r2, r3, r1
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r4, #1
|
||||
|
@ -526,19 +523,16 @@ define arm_aapcs_vfpcc i64 @sminv2i64(<2 x i64> %vec, i64 %min) {
|
|||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r12, s3
|
||||
; CHECK-NEXT: vmov lr, s1
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r2, r12, d1
|
||||
; CHECK-NEXT: vmov r3, lr, d0
|
||||
; CHECK-NEXT: cmp r3, r2
|
||||
; CHECK-NEXT: csel r4, r3, r2, lo
|
||||
; CHECK-NEXT: cmp lr, r12
|
||||
; CHECK-NEXT: csel r4, r2, r3, lt
|
||||
; CHECK-NEXT: cmp r2, r3
|
||||
; CHECK-NEXT: csel r2, r2, r3, lo
|
||||
; CHECK-NEXT: cmp lr, r12
|
||||
; CHECK-NEXT: csel r5, r2, r4, eq
|
||||
; CHECK-NEXT: csel r2, r3, r2, lt
|
||||
; CHECK-NEXT: csel r3, lr, r12, lt
|
||||
; CHECK-NEXT: csel r5, r4, r2, eq
|
||||
; CHECK-NEXT: movs r4, #0
|
||||
; CHECK-NEXT: subs r2, r5, r0
|
||||
; CHECK-NEXT: mov.w r4, #0
|
||||
; CHECK-NEXT: sbcs.w r2, r3, r1
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r4, #1
|
||||
|
@ -557,19 +551,16 @@ define arm_aapcs_vfpcc i64 @umaxv2i64(<2 x i64> %vec, i64 %max) {
|
|||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r12, s3
|
||||
; CHECK-NEXT: vmov lr, s1
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r2, r12, d1
|
||||
; CHECK-NEXT: vmov r3, lr, d0
|
||||
; CHECK-NEXT: cmp r3, r2
|
||||
; CHECK-NEXT: csel r4, r3, r2, hi
|
||||
; CHECK-NEXT: cmp lr, r12
|
||||
; CHECK-NEXT: csel r4, r2, r3, hi
|
||||
; CHECK-NEXT: cmp r2, r3
|
||||
; CHECK-NEXT: csel r2, r2, r3, hi
|
||||
; CHECK-NEXT: cmp lr, r12
|
||||
; CHECK-NEXT: csel r5, r2, r4, eq
|
||||
; CHECK-NEXT: csel r2, r3, r2, hi
|
||||
; CHECK-NEXT: csel r3, lr, r12, hi
|
||||
; CHECK-NEXT: csel r5, r4, r2, eq
|
||||
; CHECK-NEXT: movs r4, #0
|
||||
; CHECK-NEXT: subs r2, r0, r5
|
||||
; CHECK-NEXT: mov.w r4, #0
|
||||
; CHECK-NEXT: sbcs.w r2, r1, r3
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r4, #1
|
||||
|
@ -588,19 +579,16 @@ define arm_aapcs_vfpcc i64 @smaxv2i64(<2 x i64> %vec, i64 %max) {
|
|||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r12, s3
|
||||
; CHECK-NEXT: vmov lr, s1
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r2, r12, d1
|
||||
; CHECK-NEXT: vmov r3, lr, d0
|
||||
; CHECK-NEXT: cmp r3, r2
|
||||
; CHECK-NEXT: csel r4, r3, r2, hi
|
||||
; CHECK-NEXT: cmp lr, r12
|
||||
; CHECK-NEXT: csel r4, r2, r3, gt
|
||||
; CHECK-NEXT: cmp r2, r3
|
||||
; CHECK-NEXT: csel r2, r2, r3, hi
|
||||
; CHECK-NEXT: cmp lr, r12
|
||||
; CHECK-NEXT: csel r5, r2, r4, eq
|
||||
; CHECK-NEXT: csel r2, r3, r2, gt
|
||||
; CHECK-NEXT: csel r3, lr, r12, gt
|
||||
; CHECK-NEXT: csel r5, r4, r2, eq
|
||||
; CHECK-NEXT: movs r4, #0
|
||||
; CHECK-NEXT: subs r2, r0, r5
|
||||
; CHECK-NEXT: mov.w r4, #0
|
||||
; CHECK-NEXT: sbcs.w r2, r1, r3
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r4, #1
|
||||
|
|
|
@ -876,12 +876,11 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i16> @vmovn32_badlanes(<4 x i32> %src1) {
|
||||
; CHECK-LABEL: vmovn32_badlanes:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov r0, r1, d0
|
||||
; CHECK-NEXT: vmov.16 q1[1], r0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov.16 q1[3], r0
|
||||
; CHECK-NEXT: vmov.16 q1[5], r0
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov.16 q1[3], r1
|
||||
; CHECK-NEXT: vmov.16 q1[5], r1
|
||||
; CHECK-NEXT: vmov.16 q1[7], r0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
|
@ -889,12 +888,11 @@ define arm_aapcs_vfpcc <8 x i16> @vmovn32_badlanes(<4 x i32> %src1) {
|
|||
; CHECKBE-LABEL: vmovn32_badlanes:
|
||||
; CHECKBE: @ %bb.0: @ %entry
|
||||
; CHECKBE-NEXT: vrev64.32 q1, q0
|
||||
; CHECKBE-NEXT: vmov r0, s4
|
||||
; CHECKBE-NEXT: vmov r0, r1, d2
|
||||
; CHECKBE-NEXT: vmov.16 q2[1], r0
|
||||
; CHECKBE-NEXT: vmov r0, s5
|
||||
; CHECKBE-NEXT: vmov.16 q2[3], r0
|
||||
; CHECKBE-NEXT: vmov.16 q2[5], r0
|
||||
; CHECKBE-NEXT: vmov r0, s6
|
||||
; CHECKBE-NEXT: vmov.16 q2[3], r1
|
||||
; CHECKBE-NEXT: vmov.16 q2[5], r1
|
||||
; CHECKBE-NEXT: vmov.16 q2[7], r0
|
||||
; CHECKBE-NEXT: vrev64.16 q0, q2
|
||||
; CHECKBE-NEXT: bx lr
|
||||
|
|
|
@ -15,18 +15,14 @@ define arm_aapcs_vfpcc void @test32(i32* noalias nocapture readonly %x, i32* noa
|
|||
; CHECK-NEXT: subs r3, #4
|
||||
; CHECK-NEXT: vmullb.s32 q2, q1, q0
|
||||
; CHECK-NEXT: vmullt.s32 q3, q1, q0
|
||||
; CHECK-NEXT: vmov r5, s11
|
||||
; CHECK-NEXT: vmov r12, s10
|
||||
; CHECK-NEXT: vmov r12, r5, d5
|
||||
; CHECK-NEXT: lsrl r12, r5, #31
|
||||
; CHECK-NEXT: vmov r4, s8
|
||||
; CHECK-NEXT: vmov r5, s9
|
||||
; CHECK-NEXT: vmov r4, r5, d4
|
||||
; CHECK-NEXT: lsrl r4, r5, #31
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r4, r12
|
||||
; CHECK-NEXT: vmov r5, s15
|
||||
; CHECK-NEXT: vmov r12, s14
|
||||
; CHECK-NEXT: vmov r12, r5, d7
|
||||
; CHECK-NEXT: lsrl r12, r5, #31
|
||||
; CHECK-NEXT: vmov r4, s12
|
||||
; CHECK-NEXT: vmov r5, s13
|
||||
; CHECK-NEXT: vmov r4, r5, d6
|
||||
; CHECK-NEXT: lsrl r4, r5, #31
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r4, r12
|
||||
; CHECK-NEXT: vstrb.8 q2, [r2], #16
|
||||
|
|
|
@ -85,22 +85,20 @@ define arm_aapcs_vfpcc <8 x i16> @vqdmulh_i16_c(<8 x i16> %s0, <8 x i16> %s1) {
|
|||
; CHECK-NEXT: vmov.u16 r0, q1[3]
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[1]
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: vmov.u16 r1, q2[4]
|
||||
; CHECK-NEXT: vmullb.s16 q0, q3, q0
|
||||
; CHECK-NEXT: vmov.i32 q3, #0x7fff
|
||||
; CHECK-NEXT: vshl.i32 q0, q0, #10
|
||||
; CHECK-NEXT: vshr.s32 q0, q0, #10
|
||||
; CHECK-NEXT: vshr.s32 q0, q0, #15
|
||||
; CHECK-NEXT: vmin.s32 q4, q0, q3
|
||||
; CHECK-NEXT: vmov r0, s16
|
||||
; CHECK-NEXT: vmov r0, r1, d8
|
||||
; CHECK-NEXT: vmov.16 q0[0], r0
|
||||
; CHECK-NEXT: vmov r0, s17
|
||||
; CHECK-NEXT: vmov.16 q0[1], r0
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov.16 q0[1], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d9
|
||||
; CHECK-NEXT: vmov.16 q0[2], r0
|
||||
; CHECK-NEXT: vmov r0, s19
|
||||
; CHECK-NEXT: vmov.16 q0[3], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q2[6]
|
||||
; CHECK-NEXT: vmov.16 q0[3], r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q2[4]
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r1, r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q2[7]
|
||||
; CHECK-NEXT: vmov.u16 r1, q2[5]
|
||||
|
@ -116,14 +114,12 @@ define arm_aapcs_vfpcc <8 x i16> @vqdmulh_i16_c(<8 x i16> %s0, <8 x i16> %s1) {
|
|||
; CHECK-NEXT: vshr.s32 q1, q1, #10
|
||||
; CHECK-NEXT: vshr.s32 q1, q1, #15
|
||||
; CHECK-NEXT: vmin.s32 q1, q1, q3
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r0, r1, d2
|
||||
; CHECK-NEXT: vmov.16 q0[4], r0
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.16 q0[5], r0
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.16 q0[5], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: vmov.16 q0[6], r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.16 q0[7], r0
|
||||
; CHECK-NEXT: vmov.16 q0[7], r1
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
|
|
|
@ -164,22 +164,20 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @vqmovni64_smaxmin(<2 x i64> %s0) {
|
||||
; CHECK-LABEL: vqmovni64_smaxmin:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r1, r2, d1
|
||||
; CHECK-NEXT: mvn r12, #-2147483648
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: subs.w r2, r2, r12
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: sbcs r1, r1, #0
|
||||
; CHECK-NEXT: subs.w r1, r1, r12
|
||||
; CHECK-NEXT: sbcs r1, r2, #0
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: subs.w r3, r3, r12
|
||||
; CHECK-NEXT: subs.w r2, r2, r12
|
||||
; CHECK-NEXT: mov.w r12, #-1
|
||||
; CHECK-NEXT: sbcs r2, r2, #0
|
||||
; CHECK-NEXT: sbcs r2, r3, #0
|
||||
; CHECK-NEXT: mov.w r2, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r2, #1
|
||||
|
@ -192,19 +190,17 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_smaxmin(<2 x i64> %s0) {
|
|||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: vbic q2, q2, q1
|
||||
; CHECK-NEXT: vorr q0, q0, q2
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: rsbs.w r2, r2, #-2147483648
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: sbcs.w r1, r12, r1
|
||||
; CHECK-NEXT: vmov r1, r2, d1
|
||||
; CHECK-NEXT: rsbs.w r1, r1, #-2147483648
|
||||
; CHECK-NEXT: sbcs.w r1, r12, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: rsbs.w r3, r3, #-2147483648
|
||||
; CHECK-NEXT: sbcs.w r2, r12, r2
|
||||
; CHECK-NEXT: rsbs.w r2, r2, #-2147483648
|
||||
; CHECK-NEXT: sbcs.w r2, r12, r3
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
|
@ -240,21 +236,19 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @vqmovni64_sminmax(<2 x i64> %s0) {
|
||||
; CHECK-LABEL: vqmovni64_sminmax:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r1, r2, d1
|
||||
; CHECK-NEXT: mov.w r12, #-1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: rsbs.w r2, r2, #-2147483648
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: sbcs.w r1, r12, r1
|
||||
; CHECK-NEXT: rsbs.w r1, r1, #-2147483648
|
||||
; CHECK-NEXT: sbcs.w r1, r12, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: rsbs.w r3, r3, #-2147483648
|
||||
; CHECK-NEXT: sbcs.w r2, r12, r2
|
||||
; CHECK-NEXT: rsbs.w r2, r2, #-2147483648
|
||||
; CHECK-NEXT: sbcs.w r2, r12, r3
|
||||
; CHECK-NEXT: mvn r12, #-2147483648
|
||||
; CHECK-NEXT: mov.w r2, #0
|
||||
; CHECK-NEXT: it lt
|
||||
|
@ -268,19 +262,17 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_sminmax(<2 x i64> %s0) {
|
|||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: vbic q2, q2, q1
|
||||
; CHECK-NEXT: vorr q0, q0, q2
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: subs.w r2, r2, r12
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: sbcs r1, r1, #0
|
||||
; CHECK-NEXT: vmov r1, r2, d1
|
||||
; CHECK-NEXT: subs.w r1, r1, r12
|
||||
; CHECK-NEXT: sbcs r1, r2, #0
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: subs.w r3, r3, r12
|
||||
; CHECK-NEXT: sbcs r2, r2, #0
|
||||
; CHECK-NEXT: subs.w r2, r2, r12
|
||||
; CHECK-NEXT: sbcs r2, r3, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
|
@ -316,21 +308,19 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @vqmovni64_umaxmin(<2 x i64> %s0) {
|
||||
; CHECK-LABEL: vqmovni64_umaxmin:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: movs r2, #0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov.i64 q2, #0xffffffff
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: subs.w r1, r1, #-1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: sbcs r0, r0, #0
|
||||
; CHECK-NEXT: subs.w r0, r0, #-1
|
||||
; CHECK-NEXT: sbcs r0, r1, #0
|
||||
; CHECK-NEXT: vmov r1, r3, d0
|
||||
; CHECK-NEXT: mov.w r0, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: subs.w r3, r3, #-1
|
||||
; CHECK-NEXT: sbcs r1, r1, #0
|
||||
; CHECK-NEXT: subs.w r1, r1, #-1
|
||||
; CHECK-NEXT: sbcs r1, r3, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
|
@ -350,21 +340,19 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @vqmovni64_uminmax(<2 x i64> %s0) {
|
||||
; CHECK-LABEL: vqmovni64_uminmax:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: movs r2, #0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov.i64 q2, #0xffffffff
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: subs.w r1, r1, #-1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: sbcs r0, r0, #0
|
||||
; CHECK-NEXT: subs.w r0, r0, #-1
|
||||
; CHECK-NEXT: sbcs r0, r1, #0
|
||||
; CHECK-NEXT: vmov r1, r3, d0
|
||||
; CHECK-NEXT: mov.w r0, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: subs.w r3, r3, #-1
|
||||
; CHECK-NEXT: sbcs r1, r1, #0
|
||||
; CHECK-NEXT: subs.w r1, r1, #-1
|
||||
; CHECK-NEXT: sbcs r1, r3, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
|
|
|
@ -180,56 +180,52 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @vqshrni64_smaxmin(<2 x i64> %so) {
|
||||
; CHECK-LABEL: vqshrni64_smaxmin:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: mvn r12, #-2147483648
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: mov.w lr, #0
|
||||
; CHECK-NEXT: asrl r2, r1, #3
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: subs.w r3, r2, r12
|
||||
; CHECK-NEXT: sbcs r3, r1, #0
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: mvn lr, #-2147483648
|
||||
; CHECK-NEXT: vmov r0, r1, d0
|
||||
; CHECK-NEXT: asrl r2, r3, #3
|
||||
; CHECK-NEXT: asrl r0, r1, #3
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
|
||||
; CHECK-NEXT: subs.w r2, r2, lr
|
||||
; CHECK-NEXT: sbcs r2, r3, #0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r3
|
||||
; CHECK-NEXT: mov.w r2, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: csetm r5, ne
|
||||
; CHECK-NEXT: asrl r4, r3, #3
|
||||
; CHECK-NEXT: subs.w r0, r4, r12
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r4, r2
|
||||
; CHECK-NEXT: sbcs r0, r3, #0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r3, r1
|
||||
; CHECK-NEXT: movlt r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: csetm r2, ne
|
||||
; CHECK-NEXT: subs.w r0, r0, lr
|
||||
; CHECK-NEXT: sbcs r0, r1, #0
|
||||
; CHECK-NEXT: mov.w r0, #0
|
||||
; CHECK-NEXT: mov.w r2, #-1
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r5
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r0, r2
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r0, r2
|
||||
; CHECK-NEXT: adr r0, .LCPI12_0
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vbic q1, q1, q0
|
||||
; CHECK-NEXT: vand q0, q2, q0
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0]
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: mov.w r2, #-1
|
||||
; CHECK-NEXT: vbic q1, q2, q1
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: rsbs.w r1, r1, #-2147483648
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: sbcs.w r0, r2, r0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: rsbs.w r0, r0, #-2147483648
|
||||
; CHECK-NEXT: sbcs.w r0, r2, r1
|
||||
; CHECK-NEXT: vmov r1, r3, d0
|
||||
; CHECK-NEXT: mov.w r0, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: rsbs.w r3, r3, #-2147483648
|
||||
; CHECK-NEXT: sbcs.w r1, r2, r1
|
||||
; CHECK-NEXT: rsbs.w r1, r1, #-2147483648
|
||||
; CHECK-NEXT: sbcs.w r1, r2, r3
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt.w lr, #1
|
||||
; CHECK-NEXT: cmp.w lr, #0
|
||||
; CHECK-NEXT: movlt.w r12, #1
|
||||
; CHECK-NEXT: cmp.w r12, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
|
@ -238,7 +234,7 @@ define arm_aapcs_vfpcc <2 x i64> @vqshrni64_smaxmin(<2 x i64> %so) {
|
|||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: vbic q2, q2, q1
|
||||
; CHECK-NEXT: vorr q0, q0, q2
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI12_0:
|
||||
|
@ -265,19 +261,17 @@ define arm_aapcs_vfpcc <2 x i64> @vqshrni64_sminmax(<2 x i64> %so) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r2, r1, d1
|
||||
; CHECK-NEXT: mov.w r12, #-1
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: mov.w lr, #0
|
||||
; CHECK-NEXT: asrl r2, r1, #3
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: mov.w lr, #0
|
||||
; CHECK-NEXT: rsbs.w r3, r2, #-2147483648
|
||||
; CHECK-NEXT: sbcs.w r3, r12, r1
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r4, r3, d0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: asrl r4, r3, #3
|
||||
; CHECK-NEXT: rsbs.w r5, r4, #-2147483648
|
||||
|
@ -297,19 +291,17 @@ define arm_aapcs_vfpcc <2 x i64> @vqshrni64_sminmax(<2 x i64> %so) {
|
|||
; CHECK-NEXT: vbic q1, q1, q0
|
||||
; CHECK-NEXT: vand q0, q2, q0
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: subs r1, r1, r2
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: sbcs r0, r0, #0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: subs r0, r0, r2
|
||||
; CHECK-NEXT: sbcs r0, r1, #0
|
||||
; CHECK-NEXT: vmov r1, r3, d0
|
||||
; CHECK-NEXT: mov.w r0, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: sbcs r1, r1, #0
|
||||
; CHECK-NEXT: subs r1, r1, r2
|
||||
; CHECK-NEXT: sbcs r1, r3, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt.w lr, #1
|
||||
; CHECK-NEXT: cmp.w lr, #0
|
||||
|
@ -346,37 +338,33 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @vqshrni64_umaxmin(<2 x i64> %so) {
|
||||
; CHECK-LABEL: vqshrni64_umaxmin:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r5, s3
|
||||
; CHECK-NEXT: movs r2, #0
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov.i64 q1, #0xffffffff
|
||||
; CHECK-NEXT: lsrl r0, r5, #3
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: subs.w r3, r0, #-1
|
||||
; CHECK-NEXT: sbcs r3, r5, #0
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: vmov r0, r3, d1
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: vmov r2, r1, d0
|
||||
; CHECK-NEXT: lsrl r0, r3, #3
|
||||
; CHECK-NEXT: lsrl r2, r1, #3
|
||||
; CHECK-NEXT: vmov.i64 q2, #0xffffffff
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
|
||||
; CHECK-NEXT: subs.w r0, r0, #-1
|
||||
; CHECK-NEXT: sbcs r0, r3, #0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r3
|
||||
; CHECK-NEXT: mov.w r0, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: csetm r12, ne
|
||||
; CHECK-NEXT: lsrl r4, r3, #3
|
||||
; CHECK-NEXT: subs.w r1, r4, #-1
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r4, r0
|
||||
; CHECK-NEXT: sbcs r1, r3, #0
|
||||
; CHECK-NEXT: movlo r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: subs.w r2, r2, #-1
|
||||
; CHECK-NEXT: sbcs r1, r1, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r3, r5
|
||||
; CHECK-NEXT: movlo.w r12, #1
|
||||
; CHECK-NEXT: cmp.w r12, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r12
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r12
|
||||
; CHECK-NEXT: vbic q1, q1, q0
|
||||
; CHECK-NEXT: vand q0, q2, q0
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: vbic q1, q2, q1
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%s0 = lshr <2 x i64> %so, <i64 3, i64 3>
|
||||
%c1 = icmp ult <2 x i64> %s0, <i64 4294967295, i64 4294967295>
|
||||
|
@ -387,37 +375,33 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @vqshrni64_uminmax(<2 x i64> %so) {
|
||||
; CHECK-LABEL: vqshrni64_uminmax:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r5, s3
|
||||
; CHECK-NEXT: movs r2, #0
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov.i64 q1, #0xffffffff
|
||||
; CHECK-NEXT: lsrl r0, r5, #3
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: subs.w r3, r0, #-1
|
||||
; CHECK-NEXT: sbcs r3, r5, #0
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: vmov r0, r3, d1
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: vmov r2, r1, d0
|
||||
; CHECK-NEXT: lsrl r0, r3, #3
|
||||
; CHECK-NEXT: lsrl r2, r1, #3
|
||||
; CHECK-NEXT: vmov.i64 q2, #0xffffffff
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
|
||||
; CHECK-NEXT: subs.w r0, r0, #-1
|
||||
; CHECK-NEXT: sbcs r0, r3, #0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r3
|
||||
; CHECK-NEXT: mov.w r0, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: csetm r12, ne
|
||||
; CHECK-NEXT: lsrl r4, r3, #3
|
||||
; CHECK-NEXT: subs.w r1, r4, #-1
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r4, r0
|
||||
; CHECK-NEXT: sbcs r1, r3, #0
|
||||
; CHECK-NEXT: movlo r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: subs.w r2, r2, #-1
|
||||
; CHECK-NEXT: sbcs r1, r1, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r3, r5
|
||||
; CHECK-NEXT: movlo.w r12, #1
|
||||
; CHECK-NEXT: cmp.w r12, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r12
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r12
|
||||
; CHECK-NEXT: vbic q1, q1, q0
|
||||
; CHECK-NEXT: vand q0, q2, q0
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: vbic q1, q2, q1
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%s0 = lshr <2 x i64> %so, <i64 3, i64 3>
|
||||
%c2 = icmp ult <2 x i64> %s0, <i64 4294967295, i64 4294967295>
|
||||
|
|
|
@ -520,9 +520,8 @@ define void @vst2_v2f16(<2 x half> *%src, <4 x half> *%dst) {
|
|||
; CHECK-NEXT: vins.f16 s4, s0
|
||||
; CHECK-NEXT: vmovx.f16 s0, s0
|
||||
; CHECK-NEXT: vins.f16 s5, s0
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: str r0, [r1, #4]
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r0, r2, d2
|
||||
; CHECK-NEXT: str r2, [r1, #4]
|
||||
; CHECK-NEXT: str r0, [r1]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
|
|
|
@ -301,35 +301,31 @@ entry:
|
|||
define void @vst3_v4i16(<4 x i16> *%src, <12 x i16> *%dst) {
|
||||
; CHECK-LABEL: vst3_v4i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vldrh.u32 q1, [r0]
|
||||
; CHECK-NEXT: vldrh.u32 q3, [r0, #8]
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vldrh.u32 q2, [r0, #16]
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov.16 q4[0], r0
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmov.16 q4[1], r0
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: vmov.16 q4[2], r0
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.16 q4[3], r0
|
||||
; CHECK-NEXT: vmov r0, s13
|
||||
; CHECK-NEXT: vmov.f64 d0, d5
|
||||
; CHECK-NEXT: vmov.16 q4[4], r0
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov.f32 s1, s7
|
||||
; CHECK-NEXT: vmov.16 q4[5], r0
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.f32 s3, s11
|
||||
; CHECK-NEXT: vmov.16 q4[6], r0
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: vmov.f32 s2, s15
|
||||
; CHECK-NEXT: vmov.16 q4[7], r0
|
||||
; CHECK-NEXT: vstrh.32 q0, [r1, #16]
|
||||
; CHECK-NEXT: vstrw.32 q4, [r1]
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vldrh.u32 q1, [r0]
|
||||
; CHECK-NEXT: vldrh.u32 q0, [r0, #8]
|
||||
; CHECK-NEXT: vmov.f64 d6, d5
|
||||
; CHECK-NEXT: vmov.f32 s13, s7
|
||||
; CHECK-NEXT: vmov r0, r5, d2
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: vmov lr, r4, d1
|
||||
; CHECK-NEXT: vmov.16 q0[0], r0
|
||||
; CHECK-NEXT: vmov.f32 s15, s11
|
||||
; CHECK-NEXT: vmov.16 q0[1], r2
|
||||
; CHECK-NEXT: vmov.32 q3[2], r4
|
||||
; CHECK-NEXT: vmov r0, r4, d4
|
||||
; CHECK-NEXT: vmov.16 q0[2], r0
|
||||
; CHECK-NEXT: vmov r12, s6
|
||||
; CHECK-NEXT: vmov.16 q0[3], r5
|
||||
; CHECK-NEXT: vstrh.32 q3, [r1, #16]
|
||||
; CHECK-NEXT: vmov.16 q0[4], r3
|
||||
; CHECK-NEXT: vmov.16 q0[5], r4
|
||||
; CHECK-NEXT: vmov.16 q0[6], r12
|
||||
; CHECK-NEXT: vmov.16 q0[7], lr
|
||||
; CHECK-NEXT: vstrw.32 q0, [r1]
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%s1 = getelementptr <4 x i16>, <4 x i16>* %src, i32 0
|
||||
%l1 = load <4 x i16>, <4 x i16>* %s1, align 4
|
||||
|
@ -621,40 +617,33 @@ entry:
|
|||
define void @vst3_v4i8(<4 x i8> *%src, <12 x i8> *%dst) {
|
||||
; CHECK-LABEL: vst3_v4i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vldrb.u32 q0, [r0, #4]
|
||||
; CHECK-NEXT: vldrb.u32 q1, [r0]
|
||||
; CHECK-NEXT: vldrb.u32 q2, [r0, #4]
|
||||
; CHECK-NEXT: vldrb.u32 q3, [r0, #8]
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: vmov.16 q0[0], r2
|
||||
; CHECK-NEXT: vmov r2, s8
|
||||
; CHECK-NEXT: vmov.16 q0[1], r2
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmov.16 q0[2], r0
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.16 q0[3], r0
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov.16 q0[4], r0
|
||||
; CHECK-NEXT: vmov r0, s13
|
||||
; CHECK-NEXT: vmov.16 q0[5], r0
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.16 q0[6], r0
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov.16 q0[7], r0
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: vmov.8 q4[8], r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.8 q4[9], r0
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov.8 q4[10], r0
|
||||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: vmov.8 q4[11], r0
|
||||
; CHECK-NEXT: vstrb.16 q0, [r1]
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: str r0, [r1, #8]
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov r2, lr, d0
|
||||
; CHECK-NEXT: vmov r12, r3, d1
|
||||
; CHECK-NEXT: vldrb.u32 q0, [r0, #8]
|
||||
; CHECK-NEXT: vmov r0, r6, d3
|
||||
; CHECK-NEXT: vmov r4, r5, d1
|
||||
; CHECK-NEXT: vmov.8 q2[8], r4
|
||||
; CHECK-NEXT: vmov.8 q2[9], r6
|
||||
; CHECK-NEXT: vmov.8 q2[10], r3
|
||||
; CHECK-NEXT: vmov.8 q2[11], r5
|
||||
; CHECK-NEXT: vmov r3, s10
|
||||
; CHECK-NEXT: str r3, [r1, #8]
|
||||
; CHECK-NEXT: vmov r3, r4, d2
|
||||
; CHECK-NEXT: vmov.16 q1[0], r3
|
||||
; CHECK-NEXT: vmov r3, r5, d0
|
||||
; CHECK-NEXT: vmov.16 q1[1], r2
|
||||
; CHECK-NEXT: vmov.16 q1[2], r3
|
||||
; CHECK-NEXT: vmov.16 q1[3], r4
|
||||
; CHECK-NEXT: vmov.16 q1[4], lr
|
||||
; CHECK-NEXT: vmov.16 q1[5], r5
|
||||
; CHECK-NEXT: vmov.16 q1[6], r0
|
||||
; CHECK-NEXT: vmov.16 q1[7], r12
|
||||
; CHECK-NEXT: vstrb.16 q1, [r1]
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%s1 = getelementptr <4 x i8>, <4 x i8>* %src, i32 0
|
||||
%l1 = load <4 x i8>, <4 x i8>* %s1, align 4
|
||||
|
@ -1313,11 +1302,9 @@ define void @vst3_v2f16(<2 x half> *%src, <6 x half> *%dst) {
|
|||
; CHECK-NEXT: vins.f16 s4, s8
|
||||
; CHECK-NEXT: vins.f16 s2, s10
|
||||
; CHECK-NEXT: vmov.f32 s1, s4
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: str r0, [r1, #8]
|
||||
; CHECK-NEXT: strd r3, r2, [r1]
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r0, r2, d0
|
||||
; CHECK-NEXT: stm r1!, {r0, r2, r3}
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%s1 = getelementptr <2 x half>, <2 x half>* %src, i32 0
|
||||
|
@ -1365,11 +1352,10 @@ define void @vst3_v4f16(<4 x half> *%src, <12 x half> *%dst) {
|
|||
; CHECK-NEXT: vins.f16 s5, s10
|
||||
; CHECK-NEXT: vins.f16 s17, s12
|
||||
; CHECK-NEXT: vmov.f32 s16, s5
|
||||
; CHECK-NEXT: vmov r2, s17
|
||||
; CHECK-NEXT: vmov.f32 s1, s4
|
||||
; CHECK-NEXT: vmov.f32 s3, s8
|
||||
; CHECK-NEXT: vstrw.32 q0, [r1]
|
||||
; CHECK-NEXT: vmov r0, s16
|
||||
; CHECK-NEXT: vmov r0, r2, d8
|
||||
; CHECK-NEXT: strd r0, r2, [r1, #16]
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
|
|
|
@ -203,40 +203,40 @@ entry:
|
|||
define void @vst4_v4i32_align1(<4 x i32> *%src, <16 x i32> *%dst) {
|
||||
; CHECK-LABEL: vst4_v4i32_align1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r0, #32]
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #32]
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vmov.f32 s0, s9
|
||||
; CHECK-NEXT: vmov r0, s13
|
||||
; CHECK-NEXT: vdup.32 q4, r0
|
||||
; CHECK-NEXT: vmov.f32 s1, s5
|
||||
; CHECK-NEXT: vmov.f32 s2, s18
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmov.f32 s3, s19
|
||||
; CHECK-NEXT: vdup.32 q4, r0
|
||||
; CHECK-NEXT: vmov.f32 s9, s4
|
||||
; CHECK-NEXT: vstrb.8 q0, [r1, #16]
|
||||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: vmov.f32 s16, s8
|
||||
; CHECK-NEXT: vdup.32 q6, r0
|
||||
; CHECK-NEXT: vmov.f32 s20, s11
|
||||
; CHECK-NEXT: vmov.f32 s8, s10
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: vmov.f32 s21, s7
|
||||
; CHECK-NEXT: vmov.f32 s17, s4
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: vmov r12, lr, d0
|
||||
; CHECK-NEXT: vdup.32 q4, r3
|
||||
; CHECK-NEXT: vmov.f64 d0, d6
|
||||
; CHECK-NEXT: vmov.f32 s1, s4
|
||||
; CHECK-NEXT: vmov.f32 s4, s13
|
||||
; CHECK-NEXT: vmov.f64 d4, d7
|
||||
; CHECK-NEXT: vmov.f32 s12, s15
|
||||
; CHECK-NEXT: vmov.f32 s13, s7
|
||||
; CHECK-NEXT: vmov.f32 s14, s18
|
||||
; CHECK-NEXT: vmov.f32 s15, s19
|
||||
; CHECK-NEXT: vstrb.8 q3, [r1, #48]
|
||||
; CHECK-NEXT: vmov.f32 s9, s6
|
||||
; CHECK-NEXT: vdup.32 q1, r0
|
||||
; CHECK-NEXT: vmov.f32 s22, s26
|
||||
; CHECK-NEXT: vstrb.8 q4, [r1]
|
||||
; CHECK-NEXT: vmov.f32 s10, s6
|
||||
; CHECK-NEXT: vmov.f32 s23, s27
|
||||
; CHECK-NEXT: vmov.f32 s11, s7
|
||||
; CHECK-NEXT: vstrb.8 q5, [r1, #48]
|
||||
; CHECK-NEXT: vdup.32 q3, r2
|
||||
; CHECK-NEXT: vmov.f32 s10, s14
|
||||
; CHECK-NEXT: vmov.f32 s11, s15
|
||||
; CHECK-NEXT: vstrb.8 q2, [r1, #32]
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vdup.32 q2, lr
|
||||
; CHECK-NEXT: vmov.f32 s6, s10
|
||||
; CHECK-NEXT: vmov.f32 s7, s11
|
||||
; CHECK-NEXT: vstrb.8 q1, [r1, #16]
|
||||
; CHECK-NEXT: vdup.32 q1, r12
|
||||
; CHECK-NEXT: vmov.f32 s2, s6
|
||||
; CHECK-NEXT: vmov.f32 s3, s7
|
||||
; CHECK-NEXT: vstrb.8 q0, [r1]
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%s1 = getelementptr <4 x i32>, <4 x i32>* %src, i32 0
|
||||
%l1 = load <4 x i32>, <4 x i32>* %s1, align 4
|
||||
|
@ -297,43 +297,36 @@ entry:
|
|||
define void @vst4_v4i16(<4 x i16> *%src, <16 x i16> *%dst) {
|
||||
; CHECK-LABEL: vst4_v4i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vldrh.u32 q1, [r0]
|
||||
; CHECK-NEXT: vldrh.u32 q2, [r0, #8]
|
||||
; CHECK-NEXT: vldrh.u32 q3, [r0, #16]
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: vmov.16 q0[0], r2
|
||||
; CHECK-NEXT: vmov r2, s10
|
||||
; CHECK-NEXT: vmov.16 q0[1], r2
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vldrh.u32 q0, [r0, #8]
|
||||
; CHECK-NEXT: vldrh.u32 q2, [r0, #16]
|
||||
; CHECK-NEXT: vmov lr, r12, d0
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: vldrh.u32 q0, [r0]
|
||||
; CHECK-NEXT: vmov r4, r5, d1
|
||||
; CHECK-NEXT: vmov.16 q1[0], r4
|
||||
; CHECK-NEXT: vmov r0, r4, d5
|
||||
; CHECK-NEXT: vmov.16 q1[1], r2
|
||||
; CHECK-NEXT: vmov.16 q1[2], r0
|
||||
; CHECK-NEXT: vmov.16 q1[3], r0
|
||||
; CHECK-NEXT: vmov r0, r2, d4
|
||||
; CHECK-NEXT: vmov.16 q1[4], r5
|
||||
; CHECK-NEXT: vmov.16 q1[5], r3
|
||||
; CHECK-NEXT: vmov r3, r5, d0
|
||||
; CHECK-NEXT: vmov.16 q0[0], r3
|
||||
; CHECK-NEXT: vmov.16 q1[6], r4
|
||||
; CHECK-NEXT: vmov.16 q0[1], lr
|
||||
; CHECK-NEXT: vmov.16 q1[7], r4
|
||||
; CHECK-NEXT: vmov.16 q0[2], r0
|
||||
; CHECK-NEXT: vstrh.16 q1, [r1, #16]
|
||||
; CHECK-NEXT: vmov.16 q0[3], r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.16 q0[4], r0
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov.16 q0[5], r0
|
||||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: vmov.16 q0[6], r0
|
||||
; CHECK-NEXT: vmov.16 q0[7], r0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov.16 q4[0], r0
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: vmov.16 q4[1], r0
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmov.16 q4[2], r0
|
||||
; CHECK-NEXT: vstrh.16 q0, [r1, #16]
|
||||
; CHECK-NEXT: vmov.16 q4[3], r0
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.16 q4[4], r0
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov.16 q4[5], r0
|
||||
; CHECK-NEXT: vmov r0, s13
|
||||
; CHECK-NEXT: vmov.16 q4[6], r0
|
||||
; CHECK-NEXT: vmov.16 q4[7], r0
|
||||
; CHECK-NEXT: vstrh.16 q4, [r1]
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: vmov.16 q0[4], r5
|
||||
; CHECK-NEXT: vmov.16 q0[5], r12
|
||||
; CHECK-NEXT: vmov.16 q0[6], r2
|
||||
; CHECK-NEXT: vmov.16 q0[7], r2
|
||||
; CHECK-NEXT: vstrh.16 q0, [r1]
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%s1 = getelementptr <4 x i16>, <4 x i16>* %src, i32 0
|
||||
%l1 = load <4 x i16>, <4 x i16>* %s1, align 4
|
||||
|
@ -531,39 +524,35 @@ entry:
|
|||
define void @vst4_v4i8(<4 x i8> *%src, <16 x i8> *%dst) {
|
||||
; CHECK-LABEL: vst4_v4i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vldrb.u32 q1, [r0]
|
||||
; CHECK-NEXT: vldrb.u32 q2, [r0, #4]
|
||||
; CHECK-NEXT: vldrb.u32 q3, [r0, #8]
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: vmov.8 q0[0], r2
|
||||
; CHECK-NEXT: vmov r2, s8
|
||||
; CHECK-NEXT: vldrb.u32 q0, [r0, #4]
|
||||
; CHECK-NEXT: vldrb.u32 q2, [r0, #8]
|
||||
; CHECK-NEXT: vmov r4, r5, d2
|
||||
; CHECK-NEXT: vmov lr, r12, d1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: vmov.8 q0[0], r4
|
||||
; CHECK-NEXT: vmov r0, r4, d4
|
||||
; CHECK-NEXT: vmov.8 q0[1], r2
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmov.8 q0[2], r0
|
||||
; CHECK-NEXT: vmov.8 q0[3], r0
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.8 q0[4], r0
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov.8 q0[5], r0
|
||||
; CHECK-NEXT: vmov r0, s13
|
||||
; CHECK-NEXT: vmov.8 q0[6], r0
|
||||
; CHECK-NEXT: vmov.8 q0[7], r0
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.8 q0[8], r0
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov.8 q0[9], r0
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: vmov r0, r2, d5
|
||||
; CHECK-NEXT: vmov.8 q0[4], r5
|
||||
; CHECK-NEXT: vmov.8 q0[5], r3
|
||||
; CHECK-NEXT: vmov r3, r5, d3
|
||||
; CHECK-NEXT: vmov.8 q0[6], r4
|
||||
; CHECK-NEXT: vmov.8 q0[7], r4
|
||||
; CHECK-NEXT: vmov.8 q0[8], r3
|
||||
; CHECK-NEXT: vmov.8 q0[9], lr
|
||||
; CHECK-NEXT: vmov.8 q0[10], r0
|
||||
; CHECK-NEXT: vmov.8 q0[11], r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.8 q0[12], r0
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov.8 q0[13], r0
|
||||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: vmov.8 q0[14], r0
|
||||
; CHECK-NEXT: vmov.8 q0[15], r0
|
||||
; CHECK-NEXT: vmov.8 q0[12], r5
|
||||
; CHECK-NEXT: vmov.8 q0[13], r12
|
||||
; CHECK-NEXT: vmov.8 q0[14], r2
|
||||
; CHECK-NEXT: vmov.8 q0[15], r2
|
||||
; CHECK-NEXT: vstrb.8 q0, [r1]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%s1 = getelementptr <4 x i8>, <4 x i8>* %src, i32 0
|
||||
%l1 = load <4 x i8>, <4 x i8>* %s1, align 4
|
||||
|
@ -984,33 +973,40 @@ entry:
|
|||
define void @vst4_v4f32_align1(<4 x float> *%src, <16 x float> *%dst) {
|
||||
; CHECK-LABEL: vst4_v4f32_align1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #16]
|
||||
; CHECK-NEXT: vldrw.u32 q4, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #32]
|
||||
; CHECK-NEXT: vmov.f32 s4, s13
|
||||
; CHECK-NEXT: vmov.f32 s13, s8
|
||||
; CHECK-NEXT: vmov.f32 s20, s15
|
||||
; CHECK-NEXT: vmov.f32 s5, s9
|
||||
; CHECK-NEXT: vmov.f32 s21, s11
|
||||
; CHECK-NEXT: vmov.f32 s6, s1
|
||||
; CHECK-NEXT: vmov.f32 s22, s3
|
||||
; CHECK-NEXT: vmov.f32 s7, s1
|
||||
; CHECK-NEXT: vmov.f64 d8, d6
|
||||
; CHECK-NEXT: vstrb.8 q1, [r1, #16]
|
||||
; CHECK-NEXT: vmov.f32 s17, s8
|
||||
; CHECK-NEXT: vmov.f32 s18, s0
|
||||
; CHECK-NEXT: vmov.f32 s19, s0
|
||||
; CHECK-NEXT: vmov.f32 s23, s3
|
||||
; CHECK-NEXT: vstrb.8 q4, [r1]
|
||||
; CHECK-NEXT: vmov.f32 s0, s14
|
||||
; CHECK-NEXT: vstrb.8 q5, [r1, #48]
|
||||
; CHECK-NEXT: vmov.f32 s1, s10
|
||||
; CHECK-NEXT: vmov.f32 s3, s2
|
||||
; CHECK-NEXT: vstrb.8 q0, [r1, #32]
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #16]
|
||||
; CHECK-NEXT: vmov.f64 d2, d8
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: vmov.f32 s5, s8
|
||||
; CHECK-NEXT: vdup.32 q5, r3
|
||||
; CHECK-NEXT: vmov.f32 s8, s17
|
||||
; CHECK-NEXT: vmov.f64 d6, d9
|
||||
; CHECK-NEXT: vmov.f32 s16, s19
|
||||
; CHECK-NEXT: vmov.f32 s17, s11
|
||||
; CHECK-NEXT: vmov.f32 s18, s22
|
||||
; CHECK-NEXT: vmov.f32 s19, s23
|
||||
; CHECK-NEXT: vstrb.8 q4, [r1, #48]
|
||||
; CHECK-NEXT: vmov.f32 s13, s10
|
||||
; CHECK-NEXT: vdup.32 q4, r2
|
||||
; CHECK-NEXT: vmov r12, lr, d0
|
||||
; CHECK-NEXT: vmov.f32 s14, s18
|
||||
; CHECK-NEXT: vmov.f32 s15, s2
|
||||
; CHECK-NEXT: vstrb.8 q3, [r1, #32]
|
||||
; CHECK-NEXT: vdup.32 q3, lr
|
||||
; CHECK-NEXT: vmov.f32 s10, s14
|
||||
; CHECK-NEXT: vmov.f32 s11, s15
|
||||
; CHECK-NEXT: vstrb.8 q2, [r1, #16]
|
||||
; CHECK-NEXT: vdup.32 q2, r12
|
||||
; CHECK-NEXT: vmov.f32 s6, s10
|
||||
; CHECK-NEXT: vmov.f32 s7, s0
|
||||
; CHECK-NEXT: vstrb.8 q1, [r1]
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%s1 = getelementptr <4 x float>, <4 x float>* %src, i32 0
|
||||
%l1 = load <4 x float>, <4 x float>* %s1, align 4
|
||||
|
|
|
@ -50,10 +50,8 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x double> @foo_v4i32(<4 x i32>* nocapture readonly %pSrc, i32 %blockSize, <4 x i32> %a) {
|
||||
; CHECK-LABEL: foo_v4i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: .pad #4
|
||||
; CHECK-NEXT: sub sp, #4
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NEXT: vpt.s32 lt, q0, zr
|
||||
|
@ -62,36 +60,31 @@ define arm_aapcs_vfpcc <4 x double> @foo_v4i32(<4 x i32>* nocapture readonly %pS
|
|||
; CHECK-NEXT: vmov.i64 q5, #0xffffffff
|
||||
; CHECK-NEXT: vmov.f32 s2, s17
|
||||
; CHECK-NEXT: vand q6, q0, q5
|
||||
; CHECK-NEXT: vmov r0, s24
|
||||
; CHECK-NEXT: vmov r1, s25
|
||||
; CHECK-NEXT: bl __aeabi_ul2d
|
||||
; CHECK-NEXT: mov r4, r0
|
||||
; CHECK-NEXT: mov r5, r1
|
||||
; CHECK-NEXT: vmov r0, s26
|
||||
; CHECK-NEXT: vmov r1, s27
|
||||
; CHECK-NEXT: vmov r0, r1, d13
|
||||
; CHECK-NEXT: bl __aeabi_ul2d
|
||||
; CHECK-NEXT: vmov r2, r3, d12
|
||||
; CHECK-NEXT: vmov.f64 d0, d9
|
||||
; CHECK-NEXT: vmov.f32 s2, s19
|
||||
; CHECK-NEXT: vand q0, q0, q5
|
||||
; CHECK-NEXT: vmov d9, r0, r1
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: vmov r6, s0
|
||||
; CHECK-NEXT: vmov r7, s1
|
||||
; CHECK-NEXT: vmov d8, r4, r5
|
||||
; CHECK-NEXT: vand q5, q0, q5
|
||||
; CHECK-NEXT: vmov r4, r5, d11
|
||||
; CHECK-NEXT: mov r0, r2
|
||||
; CHECK-NEXT: mov r1, r3
|
||||
; CHECK-NEXT: bl __aeabi_ul2d
|
||||
; CHECK-NEXT: vmov d8, r0, r1
|
||||
; CHECK-NEXT: mov r0, r4
|
||||
; CHECK-NEXT: mov r1, r5
|
||||
; CHECK-NEXT: bl __aeabi_ul2d
|
||||
; CHECK-NEXT: vmov r2, r3, d10
|
||||
; CHECK-NEXT: vmov d11, r0, r1
|
||||
; CHECK-NEXT: mov r0, r6
|
||||
; CHECK-NEXT: mov r1, r7
|
||||
; CHECK-NEXT: mov r0, r2
|
||||
; CHECK-NEXT: mov r1, r3
|
||||
; CHECK-NEXT: bl __aeabi_ul2d
|
||||
; CHECK-NEXT: vmov d10, r0, r1
|
||||
; CHECK-NEXT: vmov q0, q4
|
||||
; CHECK-NEXT: vmov q1, q5
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NEXT: add sp, #4
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%active.lane.mask = icmp slt <4 x i32> %a, zeroinitializer
|
||||
%wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %pSrc, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
|
||||
|
|
Loading…
Reference in New Issue