forked from OSchip/llvm-project
Revert "[ARM] Match dual lane vmovs from insert_vector_elt"
This one needed more testing.
This commit is contained in:
parent
fc712eb7aa
commit
6e913e4451
|
@ -4790,14 +4790,6 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
|
|||
}
|
||||
}
|
||||
}
|
||||
if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) {
|
||||
assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm());
|
||||
if ((MI.getOperand(4).getImm() != 2 && MI.getOperand(4).getImm() != 3) ||
|
||||
MI.getOperand(4).getImm() != MI.getOperand(5).getImm() + 2) {
|
||||
ErrInfo = "Incorrect array index for MVE_VMOV_q_rr";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -5845,41 +5845,6 @@ def MVE_VMOV_rr_q : MVE_VMOV_64bit<(outs rGPR:$Rt, rGPR:$Rt2), (ins MQPR:$Qd),
|
|||
let AsmMatchConverter = "cvtMVEVMOVQtoDReg";
|
||||
}
|
||||
|
||||
let Predicates = [HasMVEInt] in {
|
||||
// Double lane moves. There are a number of patterns here. We know that the
|
||||
// insertelt's will be in descending order by index, and need to match the 5
|
||||
// patterns that might contain 2-0 or 3-1 pairs. These are:
|
||||
// 3 2 1 0 -> vmovqrr 31; vmovqrr 20
|
||||
// 3 2 1 -> vmovqrr 31; vmov 2
|
||||
// 3 1 -> vmovqrr 31
|
||||
// 2 1 0 -> vmovqrr 20; vmov 1
|
||||
// 2 0 -> vmovqrr 20
|
||||
// The other potential patterns will be handled by single lane inserts.
|
||||
def : Pat<(insertelt (insertelt (insertelt (insertelt (v4i32 MQPR:$src1),
|
||||
rGPR:$srcA, (i32 0)),
|
||||
rGPR:$srcB, (i32 1)),
|
||||
rGPR:$srcC, (i32 2)),
|
||||
rGPR:$srcD, (i32 3)),
|
||||
(MVE_VMOV_q_rr (MVE_VMOV_q_rr MQPR:$src1, rGPR:$srcC, rGPR:$srcA, (i32 2), (i32 0)),
|
||||
rGPR:$srcD, rGPR:$srcB, (i32 3), (i32 1))>;
|
||||
def : Pat<(insertelt (insertelt (insertelt (v4i32 MQPR:$src1),
|
||||
rGPR:$srcB, (i32 1)),
|
||||
rGPR:$srcC, (i32 2)),
|
||||
rGPR:$srcD, (i32 3)),
|
||||
(MVE_VMOV_q_rr (MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$srcC, (i32 2)),
|
||||
rGPR:$srcD, rGPR:$srcB, (i32 3), (i32 1))>;
|
||||
def : Pat<(insertelt (insertelt (v4i32 MQPR:$src1), rGPR:$srcA, (i32 1)), rGPR:$srcB, (i32 3)),
|
||||
(MVE_VMOV_q_rr MQPR:$src1, rGPR:$srcB, rGPR:$srcA, (i32 3), (i32 1))>;
|
||||
def : Pat<(insertelt (insertelt (insertelt (v4i32 MQPR:$src1),
|
||||
rGPR:$srcB, (i32 0)),
|
||||
rGPR:$srcC, (i32 1)),
|
||||
rGPR:$srcD, (i32 2)),
|
||||
(MVE_VMOV_q_rr (MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$srcC, (i32 1)),
|
||||
rGPR:$srcD, rGPR:$srcB, (i32 2), (i32 0))>;
|
||||
def : Pat<(insertelt (insertelt (v4i32 MQPR:$src1), rGPR:$srcA, (i32 0)), rGPR:$srcB, (i32 2)),
|
||||
(MVE_VMOV_q_rr MQPR:$src1, rGPR:$srcB, rGPR:$srcA, (i32 2), (i32 0))>;
|
||||
}
|
||||
|
||||
// end of coproc mov
|
||||
|
||||
// start of MVE interleaving load/store
|
||||
|
|
|
@ -39,7 +39,6 @@ define <7 x i32> @v7i32(i32 %index, i32 %TC, <7 x i32> %V1, <7 x i32> %V2) {
|
|||
; CHECK-NEXT: adr r3, .LCPI1_0
|
||||
; CHECK-NEXT: vdup.32 q1, r1
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r3]
|
||||
; CHECK-NEXT: ldr r3, [sp, #40]
|
||||
; CHECK-NEXT: vadd.i32 q2, q0, r1
|
||||
; CHECK-NEXT: vdup.32 q0, r2
|
||||
; CHECK-NEXT: vcmp.u32 hi, q1, q2
|
||||
|
@ -47,16 +46,21 @@ define <7 x i32> @v7i32(i32 %index, i32 %TC, <7 x i32> %V1, <7 x i32> %V2) {
|
|||
; CHECK-NEXT: vpnot
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vcmpt.u32 hi, q0, q2
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
|
||||
; CHECK-NEXT: vmov.32 q2[0], r2
|
||||
; CHECK-NEXT: ldr r2, [sp, #36]
|
||||
; CHECK-NEXT: ldr r3, [sp, #44]
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r3, r2
|
||||
; CHECK-NEXT: vmov.32 q2[1], r2
|
||||
; CHECK-NEXT: ldr r2, [sp, #40]
|
||||
; CHECK-NEXT: vmov.32 q2[2], r2
|
||||
; CHECK-NEXT: ldr r2, [sp, #44]
|
||||
; CHECK-NEXT: vmov.32 q2[3], r2
|
||||
; CHECK-NEXT: ldr r2, [sp]
|
||||
; CHECK-NEXT: ldr r3, [sp, #8]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r3, r2
|
||||
; CHECK-NEXT: vmov.32 q3[0], r2
|
||||
; CHECK-NEXT: ldr r2, [sp, #4]
|
||||
; CHECK-NEXT: ldr r3, [sp, #12]
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r3, r2
|
||||
; CHECK-NEXT: vmov.32 q3[1], r2
|
||||
; CHECK-NEXT: ldr r2, [sp, #8]
|
||||
; CHECK-NEXT: vmov.32 q3[2], r2
|
||||
; CHECK-NEXT: ldr r2, [sp, #12]
|
||||
; CHECK-NEXT: vmov.32 q3[3], r2
|
||||
; CHECK-NEXT: adr r2, .LCPI1_1
|
||||
; CHECK-NEXT: vpsel q2, q3, q2
|
||||
; CHECK-NEXT: vstrw.32 q2, [r0]
|
||||
|
@ -66,19 +70,21 @@ define <7 x i32> @v7i32(i32 %index, i32 %TC, <7 x i32> %V1, <7 x i32> %V2) {
|
|||
; CHECK-NEXT: vcmp.u32 hi, q1, q2
|
||||
; CHECK-NEXT: vmrs r1, p0
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: ldr r2, [sp, #56]
|
||||
; CHECK-NEXT: vmsr p0, r1
|
||||
; CHECK-NEXT: ldr r1, [sp, #52]
|
||||
; CHECK-NEXT: ldr r1, [sp, #48]
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vcmpt.u32 hi, q0, q2
|
||||
; CHECK-NEXT: vmov.32 q0[0], r1
|
||||
; CHECK-NEXT: ldr r1, [sp, #52]
|
||||
; CHECK-NEXT: vmov.32 q0[1], r1
|
||||
; CHECK-NEXT: ldr r1, [sp, #48]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r1
|
||||
; CHECK-NEXT: ldr r1, [sp, #20]
|
||||
; CHECK-NEXT: ldr r2, [sp, #24]
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-NEXT: ldr r1, [sp, #56]
|
||||
; CHECK-NEXT: vmov.32 q0[2], r1
|
||||
; CHECK-NEXT: ldr r1, [sp, #16]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r2, r1
|
||||
; CHECK-NEXT: vmov.32 q1[0], r1
|
||||
; CHECK-NEXT: ldr r1, [sp, #20]
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-NEXT: ldr r1, [sp, #24]
|
||||
; CHECK-NEXT: vmov.32 q1[2], r1
|
||||
; CHECK-NEXT: vpsel q0, q1, q0
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov.f32 s2, s1
|
||||
|
@ -407,75 +413,81 @@ define <16 x i8> @v16i8(i32 %index, i32 %TC, <16 x i8> %V1, <16 x i8> %V2) {
|
|||
define void @test_width2(i32* nocapture readnone %x, i32* nocapture %y, i8 zeroext %m) {
|
||||
; CHECK-LABEL: test_width2:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
|
||||
; CHECK-NEXT: sub sp, #4
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: sub sp, #8
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: beq.w .LBB4_3
|
||||
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
|
||||
; CHECK-NEXT: adds r0, r2, #1
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r2, r2
|
||||
; CHECK-NEXT: bic r0, r0, #1
|
||||
; CHECK-NEXT: adr r2, .LCPI4_0
|
||||
; CHECK-NEXT: subs r0, #2
|
||||
; CHECK-NEXT: movs r3, #1
|
||||
; CHECK-NEXT: bic r0, r0, #1
|
||||
; CHECK-NEXT: vmov.32 q2[0], r2
|
||||
; CHECK-NEXT: subs r0, #2
|
||||
; CHECK-NEXT: vmov.i64 q0, #0xffffffff
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r2]
|
||||
; CHECK-NEXT: vmov.32 q2[2], r2
|
||||
; CHECK-NEXT: movs r6, #0
|
||||
; CHECK-NEXT: add.w lr, r3, r0, lsr #1
|
||||
; CHECK-NEXT: mov.w r8, #0
|
||||
; CHECK-NEXT: adr r3, .LCPI4_0
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: vand q1, q1, q0
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r3]
|
||||
; CHECK-NEXT: vand q2, q2, q0
|
||||
; CHECK-NEXT: .LBB4_2: @ %vector.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r8, r8
|
||||
; CHECK-NEXT: vmov r7, s4
|
||||
; CHECK-NEXT: vmov.32 q3[0], r6
|
||||
; CHECK-NEXT: vmov r5, s8
|
||||
; CHECK-NEXT: vmov.32 q3[2], r6
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vand q3, q3, q0
|
||||
; CHECK-NEXT: vmov r6, s5
|
||||
; CHECK-NEXT: adds r6, #2
|
||||
; CHECK-NEXT: vmov r3, s14
|
||||
; CHECK-NEXT: add.w r8, r8, #2
|
||||
; CHECK-NEXT: vmov r9, s12
|
||||
; CHECK-NEXT: vmov r2, s15
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: adds r3, #1
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r3, r9
|
||||
; CHECK-NEXT: vand q3, q3, q0
|
||||
; CHECK-NEXT: adc r12, r2, #0
|
||||
; CHECK-NEXT: vmov r5, s12
|
||||
; CHECK-NEXT: vmov r4, s13
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: subs r7, r5, r7
|
||||
; CHECK-NEXT: vmov r7, s14
|
||||
; CHECK-NEXT: sbcs r4, r6
|
||||
; CHECK-NEXT: vmov r6, s15
|
||||
; CHECK-NEXT: mov.w r4, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r4, #1
|
||||
; CHECK-NEXT: cmp r4, #0
|
||||
; CHECK-NEXT: csetm r4, ne
|
||||
; CHECK-NEXT: subs r2, r7, r2
|
||||
; CHECK-NEXT: sbcs.w r0, r6, r0
|
||||
; CHECK-NEXT: vmov r2, s12
|
||||
; CHECK-NEXT: vmov.32 q3[0], r2
|
||||
; CHECK-NEXT: vmov.32 q3[2], r3
|
||||
; CHECK-NEXT: vand q3, q3, q0
|
||||
; CHECK-NEXT: vmov r4, s12
|
||||
; CHECK-NEXT: teq.w r4, r2
|
||||
; CHECK-NEXT: cset r2, ne
|
||||
; CHECK-NEXT: tst.w r2, #1
|
||||
; CHECK-NEXT: csetm r2, ne
|
||||
; CHECK-NEXT: vmov.32 q4[0], r2
|
||||
; CHECK-NEXT: vmov.32 q4[1], r2
|
||||
; CHECK-NEXT: vmov r2, s14
|
||||
; CHECK-NEXT: eors r3, r2
|
||||
; CHECK-NEXT: orrs.w r3, r3, r12
|
||||
; CHECK-NEXT: cset r3, ne
|
||||
; CHECK-NEXT: tst.w r3, #1
|
||||
; CHECK-NEXT: csetm r3, ne
|
||||
; CHECK-NEXT: subs r5, r4, r5
|
||||
; CHECK-NEXT: vmov.32 q4[2], r3
|
||||
; CHECK-NEXT: vmov r5, s10
|
||||
; CHECK-NEXT: vmov.32 q4[3], r3
|
||||
; CHECK-NEXT: vmov r3, s13
|
||||
; CHECK-NEXT: veor q4, q4, q1
|
||||
; CHECK-NEXT: sbcs.w r0, r3, r0
|
||||
; CHECK-NEXT: vmov r3, s11
|
||||
; CHECK-NEXT: mov.w r0, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r0, r4
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r0, r4
|
||||
; CHECK-NEXT: eor.w r0, r7, r3
|
||||
; CHECK-NEXT: orrs.w r0, r0, r12
|
||||
; CHECK-NEXT: cset r0, ne
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: teq.w r5, r9
|
||||
; CHECK-NEXT: cset r2, ne
|
||||
; CHECK-NEXT: tst.w r2, #1
|
||||
; CHECK-NEXT: csetm r2, ne
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r0, r2
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r0, r2
|
||||
; CHECK-NEXT: veor q4, q4, q2
|
||||
; CHECK-NEXT: vand q4, q4, q3
|
||||
; CHECK-NEXT: subs r2, r2, r5
|
||||
; CHECK-NEXT: vmov.32 q5[0], r0
|
||||
; CHECK-NEXT: vmov.32 q5[1], r0
|
||||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: @ implicit-def: $q3
|
||||
; CHECK-NEXT: sbcs r0, r3
|
||||
; CHECK-NEXT: mov.w r0, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q5[2], r0
|
||||
; CHECK-NEXT: vmov.32 q5[3], r0
|
||||
; CHECK-NEXT: vand q4, q4, q5
|
||||
; CHECK-NEXT: vmov r2, s16
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: and r2, r2, #1
|
||||
|
@ -507,9 +519,8 @@ define void @test_width2(i32* nocapture readnone %x, i32* nocapture %y, i8 zeroe
|
|||
; CHECK-NEXT: le lr, .LBB4_2
|
||||
; CHECK-NEXT: .LBB4_3: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: add sp, #8
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: add sp, #4
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: @ %bb.4:
|
||||
; CHECK-NEXT: .LCPI4_0:
|
||||
|
|
|
@ -42,18 +42,21 @@ define arm_aapcs_vfpcc <2 x i64> @abs_v2i64(<2 x i64> %s1) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: adds.w r1, r1, r0, asr #31
|
||||
; CHECK-NEXT: adc.w r12, r0, r0, asr #31
|
||||
; CHECK-NEXT: adc.w r2, r0, r0, asr #31
|
||||
; CHECK-NEXT: eor.w r2, r2, r0, asr #31
|
||||
; CHECK-NEXT: eor.w r0, r1, r0, asr #31
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov.32 q1[1], r2
|
||||
; CHECK-NEXT: adds.w r1, r1, r0, asr #31
|
||||
; CHECK-NEXT: eor.w r1, r1, r0, asr #31
|
||||
; CHECK-NEXT: adds.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: eor.w r0, r12, r0, asr #31
|
||||
; CHECK-NEXT: eor.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r1
|
||||
; CHECK-NEXT: adc.w r1, r3, r3, asr #31
|
||||
; CHECK-NEXT: eor.w r1, r1, r3, asr #31
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[2], r1
|
||||
; CHECK-NEXT: adc.w r1, r0, r0, asr #31
|
||||
; CHECK-NEXT: eor.w r0, r1, r0, asr #31
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = icmp slt <2 x i64> %s1, zeroinitializer
|
||||
|
|
|
@ -8,17 +8,19 @@ define arm_aapcs_vfpcc <4 x i32> @udiv_i32(<4 x i32> %in1, <4 x i32> %in2) {
|
|||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: udiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: udiv r1, r2, r1
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.32 q2[1], r1
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: udiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: udiv r1, r2, r1
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: udiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: vmov q0, q2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
|
@ -32,17 +34,19 @@ define arm_aapcs_vfpcc <4 x i32> @sdiv_i32(<4 x i32> %in1, <4 x i32> %in2) {
|
|||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: sdiv r1, r2, r1
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.32 q2[1], r1
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: sdiv r1, r2, r1
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: vmov q0, q2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
|
@ -59,20 +63,22 @@ define arm_aapcs_vfpcc <4 x i32> @urem_i32(<4 x i32> %in1, <4 x i32> %in2) {
|
|||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: udiv r2, r1, r0
|
||||
; CHECK-NEXT: mls r12, r2, r0, r1
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: udiv r3, r2, r1
|
||||
; CHECK-NEXT: mls lr, r3, r1, r2
|
||||
; CHECK-NEXT: vmov r2, s5
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: udiv r0, r3, r2
|
||||
; CHECK-NEXT: mls r0, r0, r2, r3
|
||||
; CHECK-NEXT: vmov r2, s7
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], lr, r12
|
||||
; CHECK-NEXT: vmov.32 q0[0], r12
|
||||
; CHECK-NEXT: udiv r1, r3, r2
|
||||
; CHECK-NEXT: vmov.32 q0[1], lr
|
||||
; CHECK-NEXT: vmov.32 q0[2], r0
|
||||
; CHECK-NEXT: mls r1, r1, r2, r3
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q0[3], r1
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%out = urem <4 x i32> %in1, %in2
|
||||
|
@ -88,20 +94,22 @@ define arm_aapcs_vfpcc <4 x i32> @srem_i32(<4 x i32> %in1, <4 x i32> %in2) {
|
|||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: sdiv r2, r1, r0
|
||||
; CHECK-NEXT: mls r12, r2, r0, r1
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: sdiv r3, r2, r1
|
||||
; CHECK-NEXT: mls lr, r3, r1, r2
|
||||
; CHECK-NEXT: vmov r2, s5
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: sdiv r0, r3, r2
|
||||
; CHECK-NEXT: mls r0, r0, r2, r3
|
||||
; CHECK-NEXT: vmov r2, s7
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], lr, r12
|
||||
; CHECK-NEXT: vmov.32 q0[0], r12
|
||||
; CHECK-NEXT: sdiv r1, r3, r2
|
||||
; CHECK-NEXT: vmov.32 q0[1], lr
|
||||
; CHECK-NEXT: vmov.32 q0[2], r0
|
||||
; CHECK-NEXT: mls r1, r1, r2, r3
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q0[3], r1
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%out = srem <4 x i32> %in1, %in2
|
||||
|
@ -731,8 +739,8 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @udiv_i64(<2 x i64> %in1, <2 x i64> %in2) {
|
||||
; CHECK-LABEL: udiv_i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vmov q4, q1
|
||||
|
@ -742,17 +750,20 @@ define arm_aapcs_vfpcc <2 x i64> @udiv_i64(<2 x i64> %in1, <2 x i64> %in2) {
|
|||
; CHECK-NEXT: vmov r2, s16
|
||||
; CHECK-NEXT: vmov r3, s17
|
||||
; CHECK-NEXT: bl __aeabi_uldivmod
|
||||
; CHECK-NEXT: mov r4, r0
|
||||
; CHECK-NEXT: mov r5, r1
|
||||
; CHECK-NEXT: vmov r0, s22
|
||||
; CHECK-NEXT: vmov r1, s23
|
||||
; CHECK-NEXT: vmov r12, s22
|
||||
; CHECK-NEXT: vmov lr, s23
|
||||
; CHECK-NEXT: vmov r2, s18
|
||||
; CHECK-NEXT: vmov r3, s19
|
||||
; CHECK-NEXT: vmov.32 q4[0], r0
|
||||
; CHECK-NEXT: vmov.32 q4[1], r1
|
||||
; CHECK-NEXT: mov r0, r12
|
||||
; CHECK-NEXT: mov r1, lr
|
||||
; CHECK-NEXT: bl __aeabi_uldivmod
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r4
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r5
|
||||
; CHECK-NEXT: vmov.32 q4[2], r0
|
||||
; CHECK-NEXT: vmov.32 q4[3], r1
|
||||
; CHECK-NEXT: vmov q0, q4
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%out = udiv <2 x i64> %in1, %in2
|
||||
ret <2 x i64> %out
|
||||
|
@ -761,8 +772,8 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @sdiv_i64(<2 x i64> %in1, <2 x i64> %in2) {
|
||||
; CHECK-LABEL: sdiv_i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vmov q4, q1
|
||||
|
@ -772,17 +783,20 @@ define arm_aapcs_vfpcc <2 x i64> @sdiv_i64(<2 x i64> %in1, <2 x i64> %in2) {
|
|||
; CHECK-NEXT: vmov r2, s16
|
||||
; CHECK-NEXT: vmov r3, s17
|
||||
; CHECK-NEXT: bl __aeabi_ldivmod
|
||||
; CHECK-NEXT: mov r4, r0
|
||||
; CHECK-NEXT: mov r5, r1
|
||||
; CHECK-NEXT: vmov r0, s22
|
||||
; CHECK-NEXT: vmov r1, s23
|
||||
; CHECK-NEXT: vmov r12, s22
|
||||
; CHECK-NEXT: vmov lr, s23
|
||||
; CHECK-NEXT: vmov r2, s18
|
||||
; CHECK-NEXT: vmov r3, s19
|
||||
; CHECK-NEXT: vmov.32 q4[0], r0
|
||||
; CHECK-NEXT: vmov.32 q4[1], r1
|
||||
; CHECK-NEXT: mov r0, r12
|
||||
; CHECK-NEXT: mov r1, lr
|
||||
; CHECK-NEXT: bl __aeabi_ldivmod
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r4
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r5
|
||||
; CHECK-NEXT: vmov.32 q4[2], r0
|
||||
; CHECK-NEXT: vmov.32 q4[3], r1
|
||||
; CHECK-NEXT: vmov q0, q4
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%out = sdiv <2 x i64> %in1, %in2
|
||||
ret <2 x i64> %out
|
||||
|
@ -791,8 +805,8 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @urem_i64(<2 x i64> %in1, <2 x i64> %in2) {
|
||||
; CHECK-LABEL: urem_i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vmov q4, q1
|
||||
|
@ -802,17 +816,20 @@ define arm_aapcs_vfpcc <2 x i64> @urem_i64(<2 x i64> %in1, <2 x i64> %in2) {
|
|||
; CHECK-NEXT: vmov r2, s16
|
||||
; CHECK-NEXT: vmov r3, s17
|
||||
; CHECK-NEXT: bl __aeabi_uldivmod
|
||||
; CHECK-NEXT: mov r4, r2
|
||||
; CHECK-NEXT: mov r5, r3
|
||||
; CHECK-NEXT: vmov r12, s18
|
||||
; CHECK-NEXT: vmov lr, s19
|
||||
; CHECK-NEXT: vmov.32 q4[0], r2
|
||||
; CHECK-NEXT: vmov r0, s22
|
||||
; CHECK-NEXT: vmov.32 q4[1], r3
|
||||
; CHECK-NEXT: vmov r1, s23
|
||||
; CHECK-NEXT: vmov r2, s18
|
||||
; CHECK-NEXT: vmov r3, s19
|
||||
; CHECK-NEXT: mov r2, r12
|
||||
; CHECK-NEXT: mov r3, lr
|
||||
; CHECK-NEXT: bl __aeabi_uldivmod
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r4
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r5
|
||||
; CHECK-NEXT: vmov.32 q4[2], r2
|
||||
; CHECK-NEXT: vmov.32 q4[3], r3
|
||||
; CHECK-NEXT: vmov q0, q4
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%out = urem <2 x i64> %in1, %in2
|
||||
ret <2 x i64> %out
|
||||
|
@ -821,8 +838,8 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @srem_i64(<2 x i64> %in1, <2 x i64> %in2) {
|
||||
; CHECK-LABEL: srem_i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vmov q4, q1
|
||||
|
@ -832,17 +849,20 @@ define arm_aapcs_vfpcc <2 x i64> @srem_i64(<2 x i64> %in1, <2 x i64> %in2) {
|
|||
; CHECK-NEXT: vmov r2, s16
|
||||
; CHECK-NEXT: vmov r3, s17
|
||||
; CHECK-NEXT: bl __aeabi_ldivmod
|
||||
; CHECK-NEXT: mov r4, r2
|
||||
; CHECK-NEXT: mov r5, r3
|
||||
; CHECK-NEXT: vmov r12, s18
|
||||
; CHECK-NEXT: vmov lr, s19
|
||||
; CHECK-NEXT: vmov.32 q4[0], r2
|
||||
; CHECK-NEXT: vmov r0, s22
|
||||
; CHECK-NEXT: vmov.32 q4[1], r3
|
||||
; CHECK-NEXT: vmov r1, s23
|
||||
; CHECK-NEXT: vmov r2, s18
|
||||
; CHECK-NEXT: vmov r3, s19
|
||||
; CHECK-NEXT: mov r2, r12
|
||||
; CHECK-NEXT: mov r3, lr
|
||||
; CHECK-NEXT: bl __aeabi_ldivmod
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r4
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r5
|
||||
; CHECK-NEXT: vmov.32 q4[2], r2
|
||||
; CHECK-NEXT: vmov.32 q4[3], r3
|
||||
; CHECK-NEXT: vmov q0, q4
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%out = srem <2 x i64> %in1, %in2
|
||||
ret <2 x i64> %out
|
||||
|
|
|
@ -589,46 +589,50 @@ define arm_aapcs_vfpcc void @gather_inc_v8i16_simple(i16* noalias nocapture read
|
|||
; CHECK-NEXT: @ Parent Loop BB11_2 Depth=1
|
||||
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
|
||||
; CHECK-NEXT: vmov.u16 r7, q2[4]
|
||||
; CHECK-NEXT: vmov.u16 r3, q2[6]
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r3, r7
|
||||
; CHECK-NEXT: vmov.u16 r3, q2[5]
|
||||
; CHECK-NEXT: vmov.u16 r7, q2[7]
|
||||
; CHECK-NEXT: vmov.u16 r5, q2[0]
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r7, r3
|
||||
; CHECK-NEXT: vmov.u16 r6, q2[2]
|
||||
; CHECK-NEXT: vmov.32 q4[0], r7
|
||||
; CHECK-NEXT: vmov.u16 r7, q2[5]
|
||||
; CHECK-NEXT: vmov.32 q4[1], r7
|
||||
; CHECK-NEXT: vmov.u16 r7, q2[6]
|
||||
; CHECK-NEXT: vmov.32 q4[2], r7
|
||||
; CHECK-NEXT: vmov.u16 r7, q2[7]
|
||||
; CHECK-NEXT: vmov.32 q4[3], r7
|
||||
; CHECK-NEXT: vmov.32 q3[0], r5
|
||||
; CHECK-NEXT: vmovlb.s16 q4, q4
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r6, r5
|
||||
; CHECK-NEXT: vshl.i32 q4, q4, #1
|
||||
; CHECK-NEXT: vmov.u16 r5, q2[1]
|
||||
; CHECK-NEXT: vshl.i32 q4, q4, #1
|
||||
; CHECK-NEXT: vmov.32 q3[1], r5
|
||||
; CHECK-NEXT: vadd.i32 q4, q4, r0
|
||||
; CHECK-NEXT: vmov.u16 r6, q2[3]
|
||||
; CHECK-NEXT: vmov r3, s16
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r6, r5
|
||||
; CHECK-NEXT: vmovlb.s16 q3, q3
|
||||
; CHECK-NEXT: vmov r7, s17
|
||||
; CHECK-NEXT: vshl.i32 q3, q3, #1
|
||||
; CHECK-NEXT: vmov.u16 r5, q2[2]
|
||||
; CHECK-NEXT: vmov r7, s16
|
||||
; CHECK-NEXT: vmov.32 q3[2], r5
|
||||
; CHECK-NEXT: vmov.u16 r5, q2[3]
|
||||
; CHECK-NEXT: vmov r3, s17
|
||||
; CHECK-NEXT: vmov.32 q3[3], r5
|
||||
; CHECK-NEXT: vadd.i16 q2, q2, q1
|
||||
; CHECK-NEXT: vmovlb.s16 q3, q3
|
||||
; CHECK-NEXT: vshl.i32 q3, q3, #1
|
||||
; CHECK-NEXT: vadd.i32 q3, q3, r0
|
||||
; CHECK-NEXT: vmov r5, s15
|
||||
; CHECK-NEXT: vmov r6, s14
|
||||
; CHECK-NEXT: vmov r12, s13
|
||||
; CHECK-NEXT: ldrh.w r11, [r3]
|
||||
; CHECK-NEXT: vmov r3, s12
|
||||
; CHECK-NEXT: ldrh r7, [r7]
|
||||
; CHECK-NEXT: ldrh.w r11, [r7]
|
||||
; CHECK-NEXT: vmov r7, s12
|
||||
; CHECK-NEXT: ldrh r3, [r3]
|
||||
; CHECK-NEXT: ldrh.w r9, [r5]
|
||||
; CHECK-NEXT: vmov r5, s18
|
||||
; CHECK-NEXT: ldrh.w r10, [r6]
|
||||
; CHECK-NEXT: vmov r6, s19
|
||||
; CHECK-NEXT: ldrh.w r1, [r12]
|
||||
; CHECK-NEXT: ldrh r3, [r3]
|
||||
; CHECK-NEXT: vmov.16 q3[0], r3
|
||||
; CHECK-NEXT: ldrh r7, [r7]
|
||||
; CHECK-NEXT: vmov.16 q3[0], r7
|
||||
; CHECK-NEXT: vmov.16 q3[1], r1
|
||||
; CHECK-NEXT: vmov.16 q3[2], r10
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: vmov.16 q3[3], r9
|
||||
; CHECK-NEXT: ldrh r6, [r6]
|
||||
; CHECK-NEXT: vmov.16 q3[4], r11
|
||||
; CHECK-NEXT: vmov.16 q3[5], r7
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: vmov.16 q3[5], r3
|
||||
; CHECK-NEXT: ldrh r6, [r6]
|
||||
; CHECK-NEXT: vmov.16 q3[6], r5
|
||||
; CHECK-NEXT: vmov.16 q3[7], r6
|
||||
; CHECK-NEXT: vstrb.8 q3, [r4], #16
|
||||
|
@ -700,27 +704,26 @@ define arm_aapcs_vfpcc void @gather_inc_v8i16_complex(i16* noalias nocapture rea
|
|||
; CHECK-NEXT: str r1, [sp, #60] @ 4-byte Spill
|
||||
; CHECK-NEXT: blt.w .LBB12_5
|
||||
; CHECK-NEXT: @ %bb.1: @ %vector.ph.preheader
|
||||
; CHECK-NEXT: bic r1, r2, #7
|
||||
; CHECK-NEXT: bic r8, r2, #7
|
||||
; CHECK-NEXT: adr r6, .LCPI12_2
|
||||
; CHECK-NEXT: sub.w r3, r1, #8
|
||||
; CHECK-NEXT: sub.w r3, r8, #8
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r6]
|
||||
; CHECK-NEXT: str r1, [sp, #56] @ 4-byte Spill
|
||||
; CHECK-NEXT: movs r7, #1
|
||||
; CHECK-NEXT: vmov.i16 q3, #0x18
|
||||
; CHECK-NEXT: add.w r1, r7, r3, lsr #3
|
||||
; CHECK-NEXT: adr r3, .LCPI12_0
|
||||
; CHECK-NEXT: vstrw.32 q0, [sp, #32] @ 16-byte Spill
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r3]
|
||||
; CHECK-NEXT: adr r7, .LCPI12_1
|
||||
; CHECK-NEXT: vmov.i16 q3, #0x18
|
||||
; CHECK-NEXT: str r1, [sp, #56] @ 4-byte Spill
|
||||
; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r7]
|
||||
; CHECK-NEXT: str r1, [sp, #52] @ 4-byte Spill
|
||||
; CHECK-NEXT: vstrw.32 q3, [sp, #64] @ 16-byte Spill
|
||||
; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill
|
||||
; CHECK-NEXT: .LBB12_2: @ %vector.ph
|
||||
; CHECK-NEXT: @ =>This Loop Header: Depth=1
|
||||
; CHECK-NEXT: @ Child Loop BB12_3 Depth 2
|
||||
; CHECK-NEXT: ldr r1, [sp, #52] @ 4-byte Reload
|
||||
; CHECK-NEXT: ldr r1, [sp, #56] @ 4-byte Reload
|
||||
; CHECK-NEXT: dls lr, r1
|
||||
; CHECK-NEXT: ldr r4, [sp, #60] @ 4-byte Reload
|
||||
; CHECK-NEXT: vldrw.u32 q7, [sp, #16] @ 16-byte Reload
|
||||
|
@ -730,27 +733,41 @@ define arm_aapcs_vfpcc void @gather_inc_v8i16_complex(i16* noalias nocapture rea
|
|||
; CHECK-NEXT: @ Parent Loop BB12_2 Depth=1
|
||||
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
|
||||
; CHECK-NEXT: vmov.u16 r3, q5[0]
|
||||
; CHECK-NEXT: vmov.u16 r5, q5[2]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r5, r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q5[1]
|
||||
; CHECK-NEXT: vmov.u16 r5, q5[3]
|
||||
; CHECK-NEXT: vmov.u16 r7, q7[4]
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r5, r3
|
||||
; CHECK-NEXT: vmov.u16 r5, q5[6]
|
||||
; CHECK-NEXT: vmov.32 q0[0], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q5[1]
|
||||
; CHECK-NEXT: vmov.32 q0[1], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q5[2]
|
||||
; CHECK-NEXT: vmov.32 q0[2], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q5[3]
|
||||
; CHECK-NEXT: vmov.32 q0[3], r3
|
||||
; CHECK-NEXT: vmov.u16 r12, q6[0]
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: vmov.u16 r12, q7[6]
|
||||
; CHECK-NEXT: vmov.32 q1[0], r12
|
||||
; CHECK-NEXT: vshl.i32 q0, q0, #1
|
||||
; CHECK-NEXT: vmov.u16 r1, q7[7]
|
||||
; CHECK-NEXT: vmov.u16 r1, q6[1]
|
||||
; CHECK-NEXT: vadd.i32 q2, q0, r0
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-NEXT: vmov r3, s10
|
||||
; CHECK-NEXT: vmov.u16 r1, q6[2]
|
||||
; CHECK-NEXT: vmov.32 q1[2], r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q6[3]
|
||||
; CHECK-NEXT: vmov.32 q1[3], r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q6[4]
|
||||
; CHECK-NEXT: vmovlb.s16 q1, q1
|
||||
; CHECK-NEXT: vmov r6, s11
|
||||
; CHECK-NEXT: vshl.i32 q1, q1, #1
|
||||
; CHECK-NEXT: vadd.i32 q4, q1, r0
|
||||
; CHECK-NEXT: ldrh.w r9, [r3]
|
||||
; CHECK-NEXT: vmov.u16 r3, q5[4]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r5, r3
|
||||
; CHECK-NEXT: vmov.32 q0[0], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q5[5]
|
||||
; CHECK-NEXT: vmov.u16 r5, q5[7]
|
||||
; CHECK-NEXT: vmov.32 q0[1], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q5[6]
|
||||
; CHECK-NEXT: vmov.32 q0[2], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q5[7]
|
||||
; CHECK-NEXT: vmov.32 q0[3], r3
|
||||
; CHECK-NEXT: ldrh r6, [r6]
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r5, r3
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: vshl.i32 q0, q0, #1
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
|
@ -761,44 +778,42 @@ define arm_aapcs_vfpcc void @gather_inc_v8i16_complex(i16* noalias nocapture rea
|
|||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: ldrh.w r11, [r3]
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r12, r7
|
||||
; CHECK-NEXT: vmov.32 q0[0], r7
|
||||
; CHECK-NEXT: vmov.u16 r7, q7[5]
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r7
|
||||
; CHECK-NEXT: vmov.32 q0[1], r7
|
||||
; CHECK-NEXT: vmov.u16 r7, q7[6]
|
||||
; CHECK-NEXT: vmov.32 q0[2], r7
|
||||
; CHECK-NEXT: vmov.u16 r7, q7[7]
|
||||
; CHECK-NEXT: vmov.32 q0[3], r7
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: vshl.i32 q0, q0, #1
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r7, s2
|
||||
; CHECK-NEXT: vstrw.32 q0, [sp, #80] @ 16-byte Spill
|
||||
; CHECK-NEXT: ldrh.w r8, [r3]
|
||||
; CHECK-NEXT: vmov.u16 r3, q6[2]
|
||||
; CHECK-NEXT: ldrh r7, [r1]
|
||||
; CHECK-NEXT: vmov.u16 r1, q6[0]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q6[1]
|
||||
; CHECK-NEXT: vmov.u16 r3, q6[3]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q6[4]
|
||||
; CHECK-NEXT: vmov.u16 r3, q6[6]
|
||||
; CHECK-NEXT: vmovlb.s16 q1, q1
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r1
|
||||
; CHECK-NEXT: vmov.32 q0[0], r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q6[5]
|
||||
; CHECK-NEXT: vmov.u16 r3, q6[7]
|
||||
; CHECK-NEXT: vshl.i32 q1, q1, #1
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
; CHECK-NEXT: vmov.32 q0[1], r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q6[6]
|
||||
; CHECK-NEXT: vmov.32 q0[2], r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q6[7]
|
||||
; CHECK-NEXT: vmov.32 q0[3], r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q7[0]
|
||||
; CHECK-NEXT: vmov.u16 r3, q7[2]
|
||||
; CHECK-NEXT: vadd.i32 q4, q1, r0
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r3, r1
|
||||
; CHECK-NEXT: vmov.32 q3[0], r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q7[1]
|
||||
; CHECK-NEXT: vmov.u16 r3, q7[3]
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r3, r1
|
||||
; CHECK-NEXT: vmov.32 q3[1], r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q7[2]
|
||||
; CHECK-NEXT: vmov.32 q3[2], r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q7[3]
|
||||
; CHECK-NEXT: vmov.32 q3[3], r1
|
||||
; CHECK-NEXT: vmov r1, s8
|
||||
; CHECK-NEXT: vshl.i32 q0, q0, #1
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s16 q3, q3
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vshl.i32 q0, q0, #1
|
||||
; CHECK-NEXT: vshl.i32 q3, q3, #1
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vadd.i32 q3, q3, r0
|
||||
; CHECK-NEXT: ldrh r3, [r3]
|
||||
; CHECK-NEXT: ldrh r7, [r7]
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: vmov.16 q1[0], r1
|
||||
; CHECK-NEXT: vmov r1, s9
|
||||
|
@ -809,7 +824,7 @@ define arm_aapcs_vfpcc void @gather_inc_v8i16_complex(i16* noalias nocapture rea
|
|||
; CHECK-NEXT: vmov.16 q1[3], r6
|
||||
; CHECK-NEXT: vmov.16 q1[4], r10
|
||||
; CHECK-NEXT: vmov.16 q1[5], r11
|
||||
; CHECK-NEXT: vmov.16 q1[6], r8
|
||||
; CHECK-NEXT: vmov.16 q1[6], r3
|
||||
; CHECK-NEXT: vmov.16 q1[7], r5
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: vmov.16 q2[0], r1
|
||||
|
@ -867,8 +882,7 @@ define arm_aapcs_vfpcc void @gather_inc_v8i16_complex(i16* noalias nocapture rea
|
|||
; CHECK-NEXT: le lr, .LBB12_3
|
||||
; CHECK-NEXT: @ %bb.4: @ %middle.block
|
||||
; CHECK-NEXT: @ in Loop: Header=BB12_2 Depth=1
|
||||
; CHECK-NEXT: ldr r1, [sp, #56] @ 4-byte Reload
|
||||
; CHECK-NEXT: cmp r1, r2
|
||||
; CHECK-NEXT: cmp r8, r2
|
||||
; CHECK-NEXT: bne.w .LBB12_2
|
||||
; CHECK-NEXT: .LBB12_5: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: add sp, #104
|
||||
|
|
|
@ -462,15 +462,17 @@ define arm_aapcs_vfpcc <4 x i32> @qi4(<4 x i32*> %p) {
|
|||
; CHECK-NEXT: vmov.i32 q1, #0x10
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: ldr r0, [r0]
|
||||
; CHECK-NEXT: ldr r1, [r1]
|
||||
; CHECK-NEXT: ldr r2, [r2]
|
||||
; CHECK-NEXT: ldr r3, [r3]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
|
||||
; CHECK-NEXT: vmov.32 q0[0], r0
|
||||
; CHECK-NEXT: ldr r1, [r1]
|
||||
; CHECK-NEXT: vmov.32 q0[1], r3
|
||||
; CHECK-NEXT: ldr r2, [r2]
|
||||
; CHECK-NEXT: vmov.32 q0[2], r1
|
||||
; CHECK-NEXT: vmov.32 q0[3], r2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%g = getelementptr inbounds i32, <4 x i32*> %p, i32 4
|
||||
|
|
|
@ -63,13 +63,15 @@ define arm_aapcs_vfpcc <2 x i8> @unscaled_v2i8_i8(i8* %base, <2 x i8>* %offptr)
|
|||
; CHECK-NEXT: ldrb r2, [r1]
|
||||
; CHECK-NEXT: vmov.i32 q0, #0xff
|
||||
; CHECK-NEXT: ldrb r1, [r1, #1]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r2
|
||||
; CHECK-NEXT: vmov.32 q1[0], r2
|
||||
; CHECK-NEXT: vmov.32 q1[2], r1
|
||||
; CHECK-NEXT: vand q0, q1, q0
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: ldrb r1, [r0, r1]
|
||||
; CHECK-NEXT: ldrb r0, [r0, r2]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
|
||||
; CHECK-NEXT: vmov.32 q0[0], r1
|
||||
; CHECK-NEXT: vmov.32 q0[2], r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%offs = load <2 x i8>, <2 x i8>* %offptr, align 1
|
||||
|
|
|
@ -9,7 +9,8 @@ define arm_aapcs_vfpcc <2 x i32> @ptr_v2i32(<2 x i32*>* %offptr) {
|
|||
; CHECK-NEXT: ldrd r1, r0, [r0]
|
||||
; CHECK-NEXT: ldr r0, [r0]
|
||||
; CHECK-NEXT: ldr r1, [r1]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
|
||||
; CHECK-NEXT: vmov.32 q0[0], r1
|
||||
; CHECK-NEXT: vmov.32 q0[2], r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%offs = load <2 x i32*>, <2 x i32*>* %offptr, align 4
|
||||
|
@ -36,26 +37,30 @@ define arm_aapcs_vfpcc <8 x i32> @ptr_v8i32(<8 x i32*>* %offptr) {
|
|||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: vmov r4, s7
|
||||
; CHECK-NEXT: vmov r5, s3
|
||||
; CHECK-NEXT: ldr.w r12, [r1]
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: ldr.w lr, [r2]
|
||||
; CHECK-NEXT: vmov r2, s5
|
||||
; CHECK-NEXT: ldr r0, [r0]
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: ldr r3, [r3]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], lr, r12
|
||||
; CHECK-NEXT: ldr r4, [r4]
|
||||
; CHECK-NEXT: ldr r0, [r0]
|
||||
; CHECK-NEXT: ldr r5, [r5]
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r5, r3
|
||||
; CHECK-NEXT: vmov.32 q0[0], r3
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: ldr r4, [r4]
|
||||
; CHECK-NEXT: vmov.32 q0[1], r5
|
||||
; CHECK-NEXT: vmov.32 q0[2], r12
|
||||
; CHECK-NEXT: vmov.32 q0[3], lr
|
||||
; CHECK-NEXT: ldr r1, [r1]
|
||||
; CHECK-NEXT: ldr r2, [r2]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r4, r2
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-NEXT: vmov.32 q1[2], r2
|
||||
; CHECK-NEXT: vmov.32 q1[3], r4
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%offs = load <8 x i32*>, <8 x i32*>* %offptr, align 4
|
||||
|
@ -66,53 +71,61 @@ entry:
|
|||
define arm_aapcs_vfpcc <16 x i32> @ptr_v16i32(<16 x i32*>* %offptr) {
|
||||
; CHECK-LABEL: ptr_v16i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #32]
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #48]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r0, #16]
|
||||
; CHECK-NEXT: vmov r1, s8
|
||||
; CHECK-NEXT: vmov r1, s10
|
||||
; CHECK-NEXT: vmov r5, s4
|
||||
; CHECK-NEXT: vmov r6, s6
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r7, s7
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmov r4, s10
|
||||
; CHECK-NEXT: vmov r6, s7
|
||||
; CHECK-NEXT: vmov r4, s11
|
||||
; CHECK-NEXT: ldr.w r12, [r1]
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: ldr r5, [r5]
|
||||
; CHECK-NEXT: ldr r6, [r6]
|
||||
; CHECK-NEXT: ldr r2, [r2]
|
||||
; CHECK-NEXT: ldr r7, [r7]
|
||||
; CHECK-NEXT: ldr r0, [r0]
|
||||
; CHECK-NEXT: ldr r6, [r6]
|
||||
; CHECK-NEXT: ldr r4, [r4]
|
||||
; CHECK-NEXT: ldr.w lr, [r1]
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: ldr r3, [r1]
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r6, r5
|
||||
; CHECK-NEXT: vmov r6, s5
|
||||
; CHECK-NEXT: vmov r5, s9
|
||||
; CHECK-NEXT: ldr r3, [r1]
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov.32 q0[0], r5
|
||||
; CHECK-NEXT: vmov r5, s5
|
||||
; CHECK-NEXT: ldr r1, [r1]
|
||||
; CHECK-NEXT: ldr r6, [r6]
|
||||
; CHECK-NEXT: ldr r5, [r5]
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r7, r6
|
||||
; CHECK-NEXT: vmov r6, s14
|
||||
; CHECK-NEXT: ldr r6, [r6]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r6, r0
|
||||
; CHECK-NEXT: vmov.32 q0[1], r5
|
||||
; CHECK-NEXT: vmov r5, s6
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov r0, s13
|
||||
; CHECK-NEXT: vmov r6, s15
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r3, lr
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r2, r1
|
||||
; CHECK-NEXT: ldr r5, [r5]
|
||||
; CHECK-NEXT: ldr r0, [r0]
|
||||
; CHECK-NEXT: ldr r6, [r6]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r6, r0
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r4, r12
|
||||
; CHECK-NEXT: vmov.32 q0[2], r5
|
||||
; CHECK-NEXT: vmov r5, s8
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: vmov.32 q0[3], r6
|
||||
; CHECK-NEXT: ldr r5, [r5]
|
||||
; CHECK-NEXT: ldr r0, [r0]
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r0, r5
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: vmov.32 q3[0], lr
|
||||
; CHECK-NEXT: vmov.32 q3[1], r3
|
||||
; CHECK-NEXT: vmov.32 q3[2], r1
|
||||
; CHECK-NEXT: vmov.32 q3[3], r2
|
||||
; CHECK-NEXT: ldr r0, [r0]
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov.32 q2[0], r5
|
||||
; CHECK-NEXT: ldr r0, [r0]
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov.32 q2[2], r12
|
||||
; CHECK-NEXT: vmov.32 q2[3], r4
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%offs = load <16 x i32*>, <16 x i32*>* %offptr, align 4
|
||||
%gather = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %offs, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> undef)
|
||||
|
@ -220,10 +233,12 @@ define arm_aapcs_vfpcc <2 x i32> @ptr_v2i16_sext(<2 x i16*>* %offptr) {
|
|||
; CHECK-NEXT: ldrd r1, r0, [r0]
|
||||
; CHECK-NEXT: ldrsh.w r0, [r0]
|
||||
; CHECK-NEXT: ldrsh.w r1, [r1]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
|
||||
; CHECK-NEXT: asrs r0, r0, #31
|
||||
; CHECK-NEXT: asrs r2, r0, #31
|
||||
; CHECK-NEXT: vmov.32 q0[0], r1
|
||||
; CHECK-NEXT: asrs r1, r1, #31
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
|
||||
; CHECK-NEXT: vmov.32 q0[1], r1
|
||||
; CHECK-NEXT: vmov.32 q0[2], r0
|
||||
; CHECK-NEXT: vmov.32 q0[3], r2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%offs = load <2 x i16*>, <2 x i16*>* %offptr, align 4
|
||||
|
@ -239,7 +254,8 @@ define arm_aapcs_vfpcc <2 x i32> @ptr_v2i16_zext(<2 x i16*>* %offptr) {
|
|||
; CHECK-NEXT: vmov.i64 q0, #0xffff
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r0, r1
|
||||
; CHECK-NEXT: vmov.32 q1[0], r1
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vand q0, q1, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
|
@ -253,16 +269,18 @@ define arm_aapcs_vfpcc <4 x i32> @ptr_v4i16_sext(<4 x i16*>* %offptr) {
|
|||
; CHECK-LABEL: ptr_v4i16_sext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: ldrh r2, [r2]
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: vmov.32 q0[0], r2
|
||||
; CHECK-NEXT: ldrh r3, [r3]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
; CHECK-NEXT: vmov.32 q0[1], r0
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: vmov.32 q0[2], r3
|
||||
; CHECK-NEXT: vmov.32 q0[3], r1
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
|
@ -276,16 +294,18 @@ define arm_aapcs_vfpcc <4 x i32> @ptr_v4i16_zext(<4 x i16*>* %offptr) {
|
|||
; CHECK-LABEL: ptr_v4i16_zext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: ldrh r2, [r2]
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: vmov.32 q0[0], r2
|
||||
; CHECK-NEXT: ldrh r3, [r3]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
; CHECK-NEXT: vmov.32 q0[1], r0
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: vmov.32 q0[2], r3
|
||||
; CHECK-NEXT: vmov.32 q0[3], r1
|
||||
; CHECK-NEXT: vmovlb.u16 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
|
@ -302,27 +322,31 @@ define arm_aapcs_vfpcc <8 x i32> @ptr_v8i16_sext(<8 x i16*>* %offptr) {
|
|||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: vmov r4, s7
|
||||
; CHECK-NEXT: vmov r5, s3
|
||||
; CHECK-NEXT: ldrh.w r12, [r1]
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: ldrh.w lr, [r2]
|
||||
; CHECK-NEXT: vmov r2, s5
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: ldrh r3, [r3]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], lr, r12
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r5, r3
|
||||
; CHECK-NEXT: vmov.32 q0[0], r3
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: vmov.32 q0[1], r5
|
||||
; CHECK-NEXT: vmov.32 q0[2], r12
|
||||
; CHECK-NEXT: vmov.32 q0[3], lr
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: ldrh r2, [r2]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r4, r2
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-NEXT: vmov.32 q1[2], r2
|
||||
; CHECK-NEXT: vmov.32 q1[3], r4
|
||||
; CHECK-NEXT: vmovlb.s16 q1, q1
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
|
@ -339,27 +363,31 @@ define arm_aapcs_vfpcc <8 x i32> @ptr_v8i16_zext(<8 x i16*>* %offptr) {
|
|||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: vmov r4, s7
|
||||
; CHECK-NEXT: vmov r5, s3
|
||||
; CHECK-NEXT: ldrh.w r12, [r1]
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: ldrh.w lr, [r2]
|
||||
; CHECK-NEXT: vmov r2, s5
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: ldrh r3, [r3]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], lr, r12
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: ldrh r5, [r5]
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r5, r3
|
||||
; CHECK-NEXT: vmov.32 q0[0], r3
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: ldrh r4, [r4]
|
||||
; CHECK-NEXT: vmov.32 q0[1], r5
|
||||
; CHECK-NEXT: vmov.32 q0[2], r12
|
||||
; CHECK-NEXT: vmov.32 q0[3], lr
|
||||
; CHECK-NEXT: vmovlb.u16 q0, q0
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: ldrh r2, [r2]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r4, r2
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-NEXT: vmov.32 q1[2], r2
|
||||
; CHECK-NEXT: vmov.32 q1[3], r4
|
||||
; CHECK-NEXT: vmovlb.u16 q1, q1
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
|
@ -566,15 +594,17 @@ define arm_aapcs_vfpcc <4 x i32> @ptr_v4i8_sext32(<4 x i8*>* %offptr) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: ldrb r1, [r1]
|
||||
; CHECK-NEXT: ldrb r2, [r2]
|
||||
; CHECK-NEXT: ldrb r3, [r3]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
|
||||
; CHECK-NEXT: vmov.32 q0[0], r0
|
||||
; CHECK-NEXT: ldrb r1, [r1]
|
||||
; CHECK-NEXT: vmov.32 q0[1], r3
|
||||
; CHECK-NEXT: ldrb r2, [r2]
|
||||
; CHECK-NEXT: vmov.32 q0[2], r1
|
||||
; CHECK-NEXT: vmov.32 q0[3], r2
|
||||
; CHECK-NEXT: vmovlb.s8 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
|
@ -590,16 +620,18 @@ define arm_aapcs_vfpcc <4 x i32> @ptr_v4i8_zext32(<4 x i8*>* %offptr) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vmov.i32 q1, #0xff
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: ldrb r1, [r1]
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: ldrb r2, [r2]
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: ldrb r1, [r1]
|
||||
; CHECK-NEXT: vmov.32 q0[0], r2
|
||||
; CHECK-NEXT: ldrb r3, [r3]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r1
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r0
|
||||
; CHECK-NEXT: vmov.32 q0[1], r1
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: vmov.32 q0[2], r3
|
||||
; CHECK-NEXT: vmov.32 q0[3], r0
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
|
@ -616,28 +648,32 @@ define arm_aapcs_vfpcc <8 x i32> @ptr_v8i8_sext32(<8 x i8*>* %offptr) {
|
|||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: vmov r4, s7
|
||||
; CHECK-NEXT: vmov r5, s3
|
||||
; CHECK-NEXT: ldrb.w r12, [r1]
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: ldrb.w lr, [r2]
|
||||
; CHECK-NEXT: vmov r2, s5
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: ldrb r3, [r3]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], lr, r12
|
||||
; CHECK-NEXT: ldrb r4, [r4]
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: ldrb r5, [r5]
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r5, r3
|
||||
; CHECK-NEXT: vmov.32 q0[0], r3
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: ldrb r4, [r4]
|
||||
; CHECK-NEXT: vmov.32 q0[1], r5
|
||||
; CHECK-NEXT: vmov.32 q0[2], r12
|
||||
; CHECK-NEXT: vmov.32 q0[3], lr
|
||||
; CHECK-NEXT: vmovlb.s8 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: ldrb r1, [r1]
|
||||
; CHECK-NEXT: ldrb r2, [r2]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r4, r2
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-NEXT: vmov.32 q1[2], r2
|
||||
; CHECK-NEXT: vmov.32 q1[3], r4
|
||||
; CHECK-NEXT: vmovlb.s8 q1, q1
|
||||
; CHECK-NEXT: vmovlb.s16 q1, q1
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
|
@ -655,28 +691,32 @@ define arm_aapcs_vfpcc <8 x i32> @ptr_v8i8_zext32(<8 x i8*>* %offptr) {
|
|||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: vmov r4, s1
|
||||
; CHECK-NEXT: vmov r5, s3
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: vmov r3, s5
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: ldrb.w r12, [r1]
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: ldrb.w lr, [r0]
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: ldrb r2, [r2]
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: ldrb.w lr, [r2]
|
||||
; CHECK-NEXT: vmov r2, s7
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: vmov.i32 q1, #0xff
|
||||
; CHECK-NEXT: ldrb r3, [r3]
|
||||
; CHECK-NEXT: ldrb r4, [r4]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r12
|
||||
; CHECK-NEXT: ldrb r3, [r3]
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: ldrb r5, [r5]
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r3, lr
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r5, r4
|
||||
; CHECK-NEXT: vmov.32 q0[0], r4
|
||||
; CHECK-NEXT: vmov.32 q2[1], r3
|
||||
; CHECK-NEXT: vmov.32 q0[1], r5
|
||||
; CHECK-NEXT: vmov.32 q0[2], r12
|
||||
; CHECK-NEXT: vmov.32 q0[3], lr
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: ldrb r1, [r1]
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r0, r1
|
||||
; CHECK-NEXT: ldrb r2, [r2]
|
||||
; CHECK-NEXT: vmov.32 q2[2], r1
|
||||
; CHECK-NEXT: vmov.32 q2[3], r2
|
||||
; CHECK-NEXT: vand q1, q2, q1
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
|
@ -803,30 +843,34 @@ define arm_aapcs_vfpcc <8 x i32> @sext_unsigned_unscaled_i8_i8_toi64(i8* %base,
|
|||
; CHECK-NEXT: vldrb.u32 q1, [r1, #4]
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: vmov r4, s7
|
||||
; CHECK-NEXT: vmov r5, s3
|
||||
; CHECK-NEXT: ldrb.w r12, [r2]
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: ldrb.w lr, [r3]
|
||||
; CHECK-NEXT: vmov r3, s5
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: ldrb r1, [r1]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], lr, r12
|
||||
; CHECK-NEXT: ldrb r4, [r4]
|
||||
; CHECK-NEXT: ldrb r5, [r5]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: ldrb r4, [r4]
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-NEXT: ldrb r2, [r2]
|
||||
; CHECK-NEXT: ldrb r3, [r3]
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r5, r2
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r4, r3
|
||||
; CHECK-NEXT: vmovlb.s8 q0, q0
|
||||
; CHECK-NEXT: vmov.32 q0[0], r2
|
||||
; CHECK-NEXT: vmov.32 q0[1], r5
|
||||
; CHECK-NEXT: vmov.32 q1[2], r3
|
||||
; CHECK-NEXT: vmov.32 q0[2], r12
|
||||
; CHECK-NEXT: vmov.32 q1[3], r4
|
||||
; CHECK-NEXT: vmov.32 q0[3], lr
|
||||
; CHECK-NEXT: vmovlb.s8 q1, q1
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s8 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s16 q1, q1
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%offs = load <8 x i8>, <8 x i8>* %offptr, align 1
|
||||
|
|
|
@ -8,15 +8,17 @@ define arm_aapcs_vfpcc <4 x i32> @unscaled_i32_i32_gather(i8* %base, <4 x i32>*
|
|||
; NOGATSCAT-NEXT: vldrw.u32 q0, [r1]
|
||||
; NOGATSCAT-NEXT: vadd.i32 q0, q0, r0
|
||||
; NOGATSCAT-NEXT: vmov r0, s0
|
||||
; NOGATSCAT-NEXT: vmov r3, s1
|
||||
; NOGATSCAT-NEXT: vmov r1, s2
|
||||
; NOGATSCAT-NEXT: vmov r2, s1
|
||||
; NOGATSCAT-NEXT: vmov r3, s3
|
||||
; NOGATSCAT-NEXT: vmov r2, s3
|
||||
; NOGATSCAT-NEXT: ldr r0, [r0]
|
||||
; NOGATSCAT-NEXT: ldr r1, [r1]
|
||||
; NOGATSCAT-NEXT: ldr r2, [r2]
|
||||
; NOGATSCAT-NEXT: ldr r3, [r3]
|
||||
; NOGATSCAT-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; NOGATSCAT-NEXT: vmov q0[3], q0[1], r3, r2
|
||||
; NOGATSCAT-NEXT: vmov.32 q0[0], r0
|
||||
; NOGATSCAT-NEXT: ldr r1, [r1]
|
||||
; NOGATSCAT-NEXT: vmov.32 q0[1], r3
|
||||
; NOGATSCAT-NEXT: ldr r2, [r2]
|
||||
; NOGATSCAT-NEXT: vmov.32 q0[2], r1
|
||||
; NOGATSCAT-NEXT: vmov.32 q0[3], r2
|
||||
; NOGATSCAT-NEXT: bx lr
|
||||
;
|
||||
; NOMVE-LABEL: unscaled_i32_i32_gather:
|
||||
|
|
|
@ -93,61 +93,64 @@ define void @foo_sext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32>
|
|||
; CHECK-LE-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-LE-NEXT: .pad #4
|
||||
; CHECK-LE-NEXT: sub sp, #4
|
||||
; CHECK-LE-NEXT: ldrd lr, r5, [r1]
|
||||
; CHECK-LE-NEXT: movs r3, #0
|
||||
; CHECK-LE-NEXT: @ implicit-def: $q0
|
||||
; CHECK-LE-NEXT: rsbs.w r1, lr, #0
|
||||
; CHECK-LE-NEXT: vmov q1[2], q1[0], r5, lr
|
||||
; CHECK-LE-NEXT: sbcs.w r1, r3, lr, asr #31
|
||||
; CHECK-LE-NEXT: mov.w r1, #0
|
||||
; CHECK-LE-NEXT: ldrd lr, r12, [r1]
|
||||
; CHECK-LE-NEXT: movs r1, #0
|
||||
; CHECK-LE-NEXT: @ implicit-def: $q1
|
||||
; CHECK-LE-NEXT: movs r4, #0
|
||||
; CHECK-LE-NEXT: rsbs.w r3, lr, #0
|
||||
; CHECK-LE-NEXT: vmov.32 q0[0], lr
|
||||
; CHECK-LE-NEXT: sbcs.w r3, r1, lr, asr #31
|
||||
; CHECK-LE-NEXT: mov.w lr, #0
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt.w lr, #1
|
||||
; CHECK-LE-NEXT: rsbs.w r3, r12, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r3, r1, r12, asr #31
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt r1, #1
|
||||
; CHECK-LE-NEXT: rsbs r4, r5, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r4, r3, r5, asr #31
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt r3, #1
|
||||
; CHECK-LE-NEXT: cmp r3, #0
|
||||
; CHECK-LE-NEXT: cmp r1, #0
|
||||
; CHECK-LE-NEXT: it ne
|
||||
; CHECK-LE-NEXT: mvnne r3, #1
|
||||
; CHECK-LE-NEXT: bfi r3, r1, #0, #1
|
||||
; CHECK-LE-NEXT: vmov r4, s4
|
||||
; CHECK-LE-NEXT: and r12, r3, #3
|
||||
; CHECK-LE-NEXT: lsls r1, r3, #31
|
||||
; CHECK-LE-NEXT: mvnne r1, #1
|
||||
; CHECK-LE-NEXT: bfi r1, lr, #0, #1
|
||||
; CHECK-LE-NEXT: vmov.32 q0[2], r12
|
||||
; CHECK-LE-NEXT: and r3, r1, #3
|
||||
; CHECK-LE-NEXT: lsls r1, r1, #31
|
||||
; CHECK-LE-NEXT: itt ne
|
||||
; CHECK-LE-NEXT: ldrne r1, [r2]
|
||||
; CHECK-LE-NEXT: vmovne.32 q0[0], r1
|
||||
; CHECK-LE-NEXT: lsls.w r1, r12, #30
|
||||
; CHECK-LE-NEXT: vmovne.32 q1[0], r1
|
||||
; CHECK-LE-NEXT: lsls r1, r3, #30
|
||||
; CHECK-LE-NEXT: itt mi
|
||||
; CHECK-LE-NEXT: ldrmi r1, [r2, #4]
|
||||
; CHECK-LE-NEXT: vmovmi.32 q0[2], r1
|
||||
; CHECK-LE-NEXT: vmov r3, s2
|
||||
; CHECK-LE-NEXT: movs r2, #0
|
||||
; CHECK-LE-NEXT: vmov r1, s0
|
||||
; CHECK-LE-NEXT: vmov q0[2], q0[0], r3, r1
|
||||
; CHECK-LE-NEXT: rsbs r5, r4, #0
|
||||
; CHECK-LE-NEXT: vmovmi.32 q1[2], r1
|
||||
; CHECK-LE-NEXT: vmov r2, s0
|
||||
; CHECK-LE-NEXT: vmov r3, s4
|
||||
; CHECK-LE-NEXT: vmov r1, s6
|
||||
; CHECK-LE-NEXT: vmov.32 q1[0], r3
|
||||
; CHECK-LE-NEXT: rsbs r5, r2, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r2, r4, r2, asr #31
|
||||
; CHECK-LE-NEXT: vmov r2, s2
|
||||
; CHECK-LE-NEXT: asr.w lr, r3, #31
|
||||
; CHECK-LE-NEXT: vmov r3, s6
|
||||
; CHECK-LE-NEXT: vmov.32 q1[1], lr
|
||||
; CHECK-LE-NEXT: asr.w r12, r1, #31
|
||||
; CHECK-LE-NEXT: sbcs.w r1, r2, r4, asr #31
|
||||
; CHECK-LE-NEXT: vmov.32 q1[2], r1
|
||||
; CHECK-LE-NEXT: mov.w r1, #0
|
||||
; CHECK-LE-NEXT: vmov q0[3], q0[1], lr, r12
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt r1, #1
|
||||
; CHECK-LE-NEXT: rsbs r5, r3, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r3, r2, r3, asr #31
|
||||
; CHECK-LE-NEXT: vmov.32 q1[3], r12
|
||||
; CHECK-LE-NEXT: rsbs r3, r2, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r2, r4, r2, asr #31
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt r2, #1
|
||||
; CHECK-LE-NEXT: cmp r2, #0
|
||||
; CHECK-LE-NEXT: movlt r4, #1
|
||||
; CHECK-LE-NEXT: cmp r4, #0
|
||||
; CHECK-LE-NEXT: it ne
|
||||
; CHECK-LE-NEXT: mvnne r2, #1
|
||||
; CHECK-LE-NEXT: bfi r2, r1, #0, #1
|
||||
; CHECK-LE-NEXT: and r1, r2, #3
|
||||
; CHECK-LE-NEXT: lsls r2, r2, #31
|
||||
; CHECK-LE-NEXT: mvnne r4, #1
|
||||
; CHECK-LE-NEXT: bfi r4, r1, #0, #1
|
||||
; CHECK-LE-NEXT: and r1, r4, #3
|
||||
; CHECK-LE-NEXT: lsls r2, r4, #31
|
||||
; CHECK-LE-NEXT: it ne
|
||||
; CHECK-LE-NEXT: vstrne d0, [r0]
|
||||
; CHECK-LE-NEXT: vstrne d2, [r0]
|
||||
; CHECK-LE-NEXT: lsls r1, r1, #30
|
||||
; CHECK-LE-NEXT: it mi
|
||||
; CHECK-LE-NEXT: vstrmi d1, [r0, #8]
|
||||
; CHECK-LE-NEXT: vstrmi d3, [r0, #8]
|
||||
; CHECK-LE-NEXT: add sp, #4
|
||||
; CHECK-LE-NEXT: pop {r4, r5, r7, pc}
|
||||
;
|
||||
|
@ -161,7 +164,9 @@ define void @foo_sext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32>
|
|||
; CHECK-BE-NEXT: rsbs.w r1, lr, #0
|
||||
; CHECK-BE-NEXT: mov.w r3, #0
|
||||
; CHECK-BE-NEXT: sbcs.w r1, r3, lr, asr #31
|
||||
; CHECK-BE-NEXT: vmov q0[3], q0[1], lr, r12
|
||||
; CHECK-BE-NEXT: vmov.32 q0[1], r12
|
||||
; CHECK-BE-NEXT: @ implicit-def: $q2
|
||||
; CHECK-BE-NEXT: vmov.32 q0[3], lr
|
||||
; CHECK-BE-NEXT: mov.w lr, #0
|
||||
; CHECK-BE-NEXT: it lt
|
||||
; CHECK-BE-NEXT: movlt.w lr, #1
|
||||
|
@ -173,7 +178,6 @@ define void @foo_sext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32>
|
|||
; CHECK-BE-NEXT: it ne
|
||||
; CHECK-BE-NEXT: mvnne r3, #1
|
||||
; CHECK-BE-NEXT: bfi r3, lr, #0, #1
|
||||
; CHECK-BE-NEXT: @ implicit-def: $q2
|
||||
; CHECK-BE-NEXT: and r1, r3, #3
|
||||
; CHECK-BE-NEXT: lsls r3, r3, #31
|
||||
; CHECK-BE-NEXT: beq .LBB5_2
|
||||
|
@ -195,15 +199,17 @@ define void @foo_sext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32>
|
|||
; CHECK-BE-NEXT: vrev64.32 q2, q1
|
||||
; CHECK-BE-NEXT: vmov r2, s11
|
||||
; CHECK-BE-NEXT: movs r4, #0
|
||||
; CHECK-BE-NEXT: vmov r1, s1
|
||||
; CHECK-BE-NEXT: vmov r3, s3
|
||||
; CHECK-BE-NEXT: vmov r3, s1
|
||||
; CHECK-BE-NEXT: vmov r1, s3
|
||||
; CHECK-BE-NEXT: rsbs r5, r2, #0
|
||||
; CHECK-BE-NEXT: sbcs.w r2, r4, r2, asr #31
|
||||
; CHECK-BE-NEXT: vmov r2, s9
|
||||
; CHECK-BE-NEXT: asr.w r12, r1, #31
|
||||
; CHECK-BE-NEXT: asr.w lr, r3, #31
|
||||
; CHECK-BE-NEXT: vmov q1[2], q1[0], lr, r12
|
||||
; CHECK-BE-NEXT: vmov q1[3], q1[1], r3, r1
|
||||
; CHECK-BE-NEXT: vmov.32 q1[0], lr
|
||||
; CHECK-BE-NEXT: asr.w r12, r1, #31
|
||||
; CHECK-BE-NEXT: vmov.32 q1[1], r3
|
||||
; CHECK-BE-NEXT: vmov.32 q1[2], r12
|
||||
; CHECK-BE-NEXT: vmov.32 q1[3], r1
|
||||
; CHECK-BE-NEXT: mov.w r1, #0
|
||||
; CHECK-BE-NEXT: it lt
|
||||
; CHECK-BE-NEXT: movlt r1, #1
|
||||
|
@ -241,62 +247,65 @@ define void @foo_sext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask,
|
|||
; CHECK-LE-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-LE-NEXT: .pad #4
|
||||
; CHECK-LE-NEXT: sub sp, #4
|
||||
; CHECK-LE-NEXT: ldrd lr, r5, [r1]
|
||||
; CHECK-LE-NEXT: movs r3, #0
|
||||
; CHECK-LE-NEXT: @ implicit-def: $q0
|
||||
; CHECK-LE-NEXT: rsbs.w r1, lr, #0
|
||||
; CHECK-LE-NEXT: vmov q1[2], q1[0], r5, lr
|
||||
; CHECK-LE-NEXT: sbcs.w r1, r3, lr, asr #31
|
||||
; CHECK-LE-NEXT: mov.w r1, #0
|
||||
; CHECK-LE-NEXT: ldrd lr, r12, [r1]
|
||||
; CHECK-LE-NEXT: movs r1, #0
|
||||
; CHECK-LE-NEXT: @ implicit-def: $q1
|
||||
; CHECK-LE-NEXT: movs r4, #0
|
||||
; CHECK-LE-NEXT: rsbs.w r3, lr, #0
|
||||
; CHECK-LE-NEXT: vmov.32 q0[0], lr
|
||||
; CHECK-LE-NEXT: sbcs.w r3, r1, lr, asr #31
|
||||
; CHECK-LE-NEXT: mov.w lr, #0
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt.w lr, #1
|
||||
; CHECK-LE-NEXT: rsbs.w r3, r12, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r3, r1, r12, asr #31
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt r1, #1
|
||||
; CHECK-LE-NEXT: rsbs r4, r5, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r4, r3, r5, asr #31
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt r3, #1
|
||||
; CHECK-LE-NEXT: cmp r3, #0
|
||||
; CHECK-LE-NEXT: cmp r1, #0
|
||||
; CHECK-LE-NEXT: it ne
|
||||
; CHECK-LE-NEXT: mvnne r3, #1
|
||||
; CHECK-LE-NEXT: bfi r3, r1, #0, #1
|
||||
; CHECK-LE-NEXT: vmov r4, s4
|
||||
; CHECK-LE-NEXT: and r12, r3, #3
|
||||
; CHECK-LE-NEXT: lsls r1, r3, #31
|
||||
; CHECK-LE-NEXT: mvnne r1, #1
|
||||
; CHECK-LE-NEXT: bfi r1, lr, #0, #1
|
||||
; CHECK-LE-NEXT: vmov.32 q0[2], r12
|
||||
; CHECK-LE-NEXT: and r3, r1, #3
|
||||
; CHECK-LE-NEXT: lsls r1, r1, #31
|
||||
; CHECK-LE-NEXT: itt ne
|
||||
; CHECK-LE-NEXT: ldrne r1, [r2]
|
||||
; CHECK-LE-NEXT: vmovne.32 q0[0], r1
|
||||
; CHECK-LE-NEXT: lsls.w r1, r12, #30
|
||||
; CHECK-LE-NEXT: vmovne.32 q1[0], r1
|
||||
; CHECK-LE-NEXT: lsls r1, r3, #30
|
||||
; CHECK-LE-NEXT: itt mi
|
||||
; CHECK-LE-NEXT: ldrmi r1, [r2, #4]
|
||||
; CHECK-LE-NEXT: vmovmi.32 q0[2], r1
|
||||
; CHECK-LE-NEXT: vmov r3, s2
|
||||
; CHECK-LE-NEXT: movs r2, #0
|
||||
; CHECK-LE-NEXT: vmov r1, s0
|
||||
; CHECK-LE-NEXT: vmov q0[2], q0[0], r3, r1
|
||||
; CHECK-LE-NEXT: rsbs r5, r4, #0
|
||||
; CHECK-LE-NEXT: vmovmi.32 q1[2], r1
|
||||
; CHECK-LE-NEXT: vmov r2, s0
|
||||
; CHECK-LE-NEXT: vmov r3, s4
|
||||
; CHECK-LE-NEXT: vmov r1, s6
|
||||
; CHECK-LE-NEXT: vmov.32 q1[0], r3
|
||||
; CHECK-LE-NEXT: rsbs r5, r2, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r2, r4, r2, asr #31
|
||||
; CHECK-LE-NEXT: vmov r2, s2
|
||||
; CHECK-LE-NEXT: asr.w lr, r3, #31
|
||||
; CHECK-LE-NEXT: vmov r3, s6
|
||||
; CHECK-LE-NEXT: vmov.32 q1[1], lr
|
||||
; CHECK-LE-NEXT: asr.w r12, r1, #31
|
||||
; CHECK-LE-NEXT: sbcs.w r1, r2, r4, asr #31
|
||||
; CHECK-LE-NEXT: vmov.32 q1[2], r1
|
||||
; CHECK-LE-NEXT: mov.w r1, #0
|
||||
; CHECK-LE-NEXT: vmov q0[3], q0[1], lr, r12
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt r1, #1
|
||||
; CHECK-LE-NEXT: rsbs r5, r3, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r3, r2, r3, asr #31
|
||||
; CHECK-LE-NEXT: vmov.32 q1[3], r12
|
||||
; CHECK-LE-NEXT: rsbs r3, r2, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r2, r4, r2, asr #31
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt r2, #1
|
||||
; CHECK-LE-NEXT: cmp r2, #0
|
||||
; CHECK-LE-NEXT: movlt r4, #1
|
||||
; CHECK-LE-NEXT: cmp r4, #0
|
||||
; CHECK-LE-NEXT: it ne
|
||||
; CHECK-LE-NEXT: mvnne r2, #1
|
||||
; CHECK-LE-NEXT: bfi r2, r1, #0, #1
|
||||
; CHECK-LE-NEXT: and r1, r2, #3
|
||||
; CHECK-LE-NEXT: lsls r2, r2, #31
|
||||
; CHECK-LE-NEXT: mvnne r4, #1
|
||||
; CHECK-LE-NEXT: bfi r4, r1, #0, #1
|
||||
; CHECK-LE-NEXT: and r1, r4, #3
|
||||
; CHECK-LE-NEXT: lsls r2, r4, #31
|
||||
; CHECK-LE-NEXT: itt ne
|
||||
; CHECK-LE-NEXT: vmovne r2, r3, d0
|
||||
; CHECK-LE-NEXT: vmovne r2, r3, d2
|
||||
; CHECK-LE-NEXT: strdne r2, r3, [r0]
|
||||
; CHECK-LE-NEXT: lsls r1, r1, #30
|
||||
; CHECK-LE-NEXT: itt mi
|
||||
; CHECK-LE-NEXT: vmovmi r1, r2, d1
|
||||
; CHECK-LE-NEXT: vmovmi r1, r2, d3
|
||||
; CHECK-LE-NEXT: strdmi r1, r2, [r0, #8]
|
||||
; CHECK-LE-NEXT: add sp, #4
|
||||
; CHECK-LE-NEXT: pop {r4, r5, r7, pc}
|
||||
|
@ -311,7 +320,9 @@ define void @foo_sext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask,
|
|||
; CHECK-BE-NEXT: rsbs.w r1, lr, #0
|
||||
; CHECK-BE-NEXT: mov.w r3, #0
|
||||
; CHECK-BE-NEXT: sbcs.w r1, r3, lr, asr #31
|
||||
; CHECK-BE-NEXT: vmov q0[3], q0[1], lr, r12
|
||||
; CHECK-BE-NEXT: vmov.32 q0[1], r12
|
||||
; CHECK-BE-NEXT: @ implicit-def: $q2
|
||||
; CHECK-BE-NEXT: vmov.32 q0[3], lr
|
||||
; CHECK-BE-NEXT: mov.w lr, #0
|
||||
; CHECK-BE-NEXT: it lt
|
||||
; CHECK-BE-NEXT: movlt.w lr, #1
|
||||
|
@ -323,7 +334,6 @@ define void @foo_sext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask,
|
|||
; CHECK-BE-NEXT: it ne
|
||||
; CHECK-BE-NEXT: mvnne r3, #1
|
||||
; CHECK-BE-NEXT: bfi r3, lr, #0, #1
|
||||
; CHECK-BE-NEXT: @ implicit-def: $q2
|
||||
; CHECK-BE-NEXT: and r1, r3, #3
|
||||
; CHECK-BE-NEXT: lsls r3, r3, #31
|
||||
; CHECK-BE-NEXT: beq .LBB6_2
|
||||
|
@ -345,15 +355,17 @@ define void @foo_sext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask,
|
|||
; CHECK-BE-NEXT: vrev64.32 q2, q1
|
||||
; CHECK-BE-NEXT: vmov r2, s11
|
||||
; CHECK-BE-NEXT: movs r4, #0
|
||||
; CHECK-BE-NEXT: vmov r1, s1
|
||||
; CHECK-BE-NEXT: vmov r3, s3
|
||||
; CHECK-BE-NEXT: vmov r3, s1
|
||||
; CHECK-BE-NEXT: vmov r1, s3
|
||||
; CHECK-BE-NEXT: rsbs r5, r2, #0
|
||||
; CHECK-BE-NEXT: sbcs.w r2, r4, r2, asr #31
|
||||
; CHECK-BE-NEXT: vmov r2, s9
|
||||
; CHECK-BE-NEXT: asr.w r12, r1, #31
|
||||
; CHECK-BE-NEXT: asr.w lr, r3, #31
|
||||
; CHECK-BE-NEXT: vmov q1[2], q1[0], lr, r12
|
||||
; CHECK-BE-NEXT: vmov q1[3], q1[1], r3, r1
|
||||
; CHECK-BE-NEXT: vmov.32 q1[0], lr
|
||||
; CHECK-BE-NEXT: asr.w r12, r1, #31
|
||||
; CHECK-BE-NEXT: vmov.32 q1[1], r3
|
||||
; CHECK-BE-NEXT: vmov.32 q1[2], r12
|
||||
; CHECK-BE-NEXT: vmov.32 q1[3], r1
|
||||
; CHECK-BE-NEXT: mov.w r1, #0
|
||||
; CHECK-BE-NEXT: it lt
|
||||
; CHECK-BE-NEXT: movlt r1, #1
|
||||
|
@ -389,63 +401,64 @@ entry:
|
|||
define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> *%src) {
|
||||
; CHECK-LE-LABEL: foo_zext_v2i64_v2i32:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-LE-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-LE-NEXT: .save {r7, lr}
|
||||
; CHECK-LE-NEXT: push {r7, lr}
|
||||
; CHECK-LE-NEXT: .pad #4
|
||||
; CHECK-LE-NEXT: sub sp, #4
|
||||
; CHECK-LE-NEXT: ldrd lr, r5, [r1]
|
||||
; CHECK-LE-NEXT: movs r3, #0
|
||||
; CHECK-LE-NEXT: @ implicit-def: $q0
|
||||
; CHECK-LE-NEXT: ldrd lr, r12, [r1]
|
||||
; CHECK-LE-NEXT: movs r1, #0
|
||||
; CHECK-LE-NEXT: @ implicit-def: $q1
|
||||
; CHECK-LE-NEXT: vmov.i64 q2, #0xffffffff
|
||||
; CHECK-LE-NEXT: rsbs.w r1, lr, #0
|
||||
; CHECK-LE-NEXT: vmov q1[2], q1[0], r5, lr
|
||||
; CHECK-LE-NEXT: sbcs.w r1, r3, lr, asr #31
|
||||
; CHECK-LE-NEXT: mov.w r1, #0
|
||||
; CHECK-LE-NEXT: rsbs.w r3, lr, #0
|
||||
; CHECK-LE-NEXT: vmov.32 q0[0], lr
|
||||
; CHECK-LE-NEXT: sbcs.w r3, r1, lr, asr #31
|
||||
; CHECK-LE-NEXT: mov.w lr, #0
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt.w lr, #1
|
||||
; CHECK-LE-NEXT: rsbs.w r3, r12, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r3, r1, r12, asr #31
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt r1, #1
|
||||
; CHECK-LE-NEXT: rsbs r4, r5, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r4, r3, r5, asr #31
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt r3, #1
|
||||
; CHECK-LE-NEXT: cmp r3, #0
|
||||
; CHECK-LE-NEXT: cmp r1, #0
|
||||
; CHECK-LE-NEXT: it ne
|
||||
; CHECK-LE-NEXT: mvnne r3, #1
|
||||
; CHECK-LE-NEXT: bfi r3, r1, #0, #1
|
||||
; CHECK-LE-NEXT: and r12, r3, #3
|
||||
; CHECK-LE-NEXT: lsls r1, r3, #31
|
||||
; CHECK-LE-NEXT: mvnne r1, #1
|
||||
; CHECK-LE-NEXT: bfi r1, lr, #0, #1
|
||||
; CHECK-LE-NEXT: vmov.32 q0[2], r12
|
||||
; CHECK-LE-NEXT: and r3, r1, #3
|
||||
; CHECK-LE-NEXT: mov.w r12, #0
|
||||
; CHECK-LE-NEXT: lsls r1, r1, #31
|
||||
; CHECK-LE-NEXT: itt ne
|
||||
; CHECK-LE-NEXT: ldrne r1, [r2]
|
||||
; CHECK-LE-NEXT: vmovne.32 q0[0], r1
|
||||
; CHECK-LE-NEXT: lsls.w r1, r12, #30
|
||||
; CHECK-LE-NEXT: vmovne.32 q1[0], r1
|
||||
; CHECK-LE-NEXT: lsls r1, r3, #30
|
||||
; CHECK-LE-NEXT: itt mi
|
||||
; CHECK-LE-NEXT: ldrmi r1, [r2, #4]
|
||||
; CHECK-LE-NEXT: vmovmi.32 q0[2], r1
|
||||
; CHECK-LE-NEXT: vmov r1, s4
|
||||
; CHECK-LE-NEXT: vmovmi.32 q1[2], r1
|
||||
; CHECK-LE-NEXT: vmov r1, s0
|
||||
; CHECK-LE-NEXT: movs r2, #0
|
||||
; CHECK-LE-NEXT: vand q0, q0, q2
|
||||
; CHECK-LE-NEXT: vand q1, q1, q2
|
||||
; CHECK-LE-NEXT: rsbs r3, r1, #0
|
||||
; CHECK-LE-NEXT: vmov r3, s6
|
||||
; CHECK-LE-NEXT: vmov r3, s2
|
||||
; CHECK-LE-NEXT: sbcs.w r1, r2, r1, asr #31
|
||||
; CHECK-LE-NEXT: mov.w r1, #0
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt r1, #1
|
||||
; CHECK-LE-NEXT: rsbs r5, r3, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r3, r2, r3, asr #31
|
||||
; CHECK-LE-NEXT: movlt.w r12, #1
|
||||
; CHECK-LE-NEXT: rsbs r1, r3, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r1, r2, r3, asr #31
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt r2, #1
|
||||
; CHECK-LE-NEXT: cmp r2, #0
|
||||
; CHECK-LE-NEXT: it ne
|
||||
; CHECK-LE-NEXT: mvnne r2, #1
|
||||
; CHECK-LE-NEXT: bfi r2, r1, #0, #1
|
||||
; CHECK-LE-NEXT: bfi r2, r12, #0, #1
|
||||
; CHECK-LE-NEXT: and r1, r2, #3
|
||||
; CHECK-LE-NEXT: lsls r2, r2, #31
|
||||
; CHECK-LE-NEXT: it ne
|
||||
; CHECK-LE-NEXT: vstrne d0, [r0]
|
||||
; CHECK-LE-NEXT: vstrne d2, [r0]
|
||||
; CHECK-LE-NEXT: lsls r1, r1, #30
|
||||
; CHECK-LE-NEXT: it mi
|
||||
; CHECK-LE-NEXT: vstrmi d1, [r0, #8]
|
||||
; CHECK-LE-NEXT: vstrmi d3, [r0, #8]
|
||||
; CHECK-LE-NEXT: add sp, #4
|
||||
; CHECK-LE-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-LE-NEXT: pop {r7, pc}
|
||||
;
|
||||
; CHECK-BE-LABEL: foo_zext_v2i64_v2i32:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
|
@ -457,7 +470,9 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32>
|
|||
; CHECK-BE-NEXT: rsbs.w r1, lr, #0
|
||||
; CHECK-BE-NEXT: mov.w r3, #0
|
||||
; CHECK-BE-NEXT: sbcs.w r1, r3, lr, asr #31
|
||||
; CHECK-BE-NEXT: vmov q0[3], q0[1], lr, r12
|
||||
; CHECK-BE-NEXT: vmov.32 q0[1], r12
|
||||
; CHECK-BE-NEXT: @ implicit-def: $q1
|
||||
; CHECK-BE-NEXT: vmov.32 q0[3], lr
|
||||
; CHECK-BE-NEXT: mov.w lr, #0
|
||||
; CHECK-BE-NEXT: it lt
|
||||
; CHECK-BE-NEXT: movlt.w lr, #1
|
||||
|
@ -469,7 +484,6 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32>
|
|||
; CHECK-BE-NEXT: it ne
|
||||
; CHECK-BE-NEXT: mvnne r3, #1
|
||||
; CHECK-BE-NEXT: bfi r3, lr, #0, #1
|
||||
; CHECK-BE-NEXT: @ implicit-def: $q1
|
||||
; CHECK-BE-NEXT: and r1, r3, #3
|
||||
; CHECK-BE-NEXT: lsls r3, r3, #31
|
||||
; CHECK-BE-NEXT: beq .LBB7_2
|
||||
|
@ -527,65 +541,66 @@ entry:
|
|||
define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> *%src) {
|
||||
; CHECK-LE-LABEL: foo_zext_v2i64_v2i32_unaligned:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-LE-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-LE-NEXT: .save {r7, lr}
|
||||
; CHECK-LE-NEXT: push {r7, lr}
|
||||
; CHECK-LE-NEXT: .pad #4
|
||||
; CHECK-LE-NEXT: sub sp, #4
|
||||
; CHECK-LE-NEXT: ldrd lr, r5, [r1]
|
||||
; CHECK-LE-NEXT: movs r3, #0
|
||||
; CHECK-LE-NEXT: @ implicit-def: $q0
|
||||
; CHECK-LE-NEXT: ldrd lr, r12, [r1]
|
||||
; CHECK-LE-NEXT: movs r1, #0
|
||||
; CHECK-LE-NEXT: @ implicit-def: $q1
|
||||
; CHECK-LE-NEXT: vmov.i64 q2, #0xffffffff
|
||||
; CHECK-LE-NEXT: rsbs.w r1, lr, #0
|
||||
; CHECK-LE-NEXT: vmov q1[2], q1[0], r5, lr
|
||||
; CHECK-LE-NEXT: sbcs.w r1, r3, lr, asr #31
|
||||
; CHECK-LE-NEXT: mov.w r1, #0
|
||||
; CHECK-LE-NEXT: rsbs.w r3, lr, #0
|
||||
; CHECK-LE-NEXT: vmov.32 q0[0], lr
|
||||
; CHECK-LE-NEXT: sbcs.w r3, r1, lr, asr #31
|
||||
; CHECK-LE-NEXT: mov.w lr, #0
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt.w lr, #1
|
||||
; CHECK-LE-NEXT: rsbs.w r3, r12, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r3, r1, r12, asr #31
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt r1, #1
|
||||
; CHECK-LE-NEXT: rsbs r4, r5, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r4, r3, r5, asr #31
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt r3, #1
|
||||
; CHECK-LE-NEXT: cmp r3, #0
|
||||
; CHECK-LE-NEXT: cmp r1, #0
|
||||
; CHECK-LE-NEXT: it ne
|
||||
; CHECK-LE-NEXT: mvnne r3, #1
|
||||
; CHECK-LE-NEXT: bfi r3, r1, #0, #1
|
||||
; CHECK-LE-NEXT: and r12, r3, #3
|
||||
; CHECK-LE-NEXT: lsls r1, r3, #31
|
||||
; CHECK-LE-NEXT: mvnne r1, #1
|
||||
; CHECK-LE-NEXT: bfi r1, lr, #0, #1
|
||||
; CHECK-LE-NEXT: vmov.32 q0[2], r12
|
||||
; CHECK-LE-NEXT: and r3, r1, #3
|
||||
; CHECK-LE-NEXT: mov.w r12, #0
|
||||
; CHECK-LE-NEXT: lsls r1, r1, #31
|
||||
; CHECK-LE-NEXT: itt ne
|
||||
; CHECK-LE-NEXT: ldrne r1, [r2]
|
||||
; CHECK-LE-NEXT: vmovne.32 q0[0], r1
|
||||
; CHECK-LE-NEXT: lsls.w r1, r12, #30
|
||||
; CHECK-LE-NEXT: vmovne.32 q1[0], r1
|
||||
; CHECK-LE-NEXT: lsls r1, r3, #30
|
||||
; CHECK-LE-NEXT: itt mi
|
||||
; CHECK-LE-NEXT: ldrmi r1, [r2, #4]
|
||||
; CHECK-LE-NEXT: vmovmi.32 q0[2], r1
|
||||
; CHECK-LE-NEXT: vmov r1, s4
|
||||
; CHECK-LE-NEXT: vmovmi.32 q1[2], r1
|
||||
; CHECK-LE-NEXT: vmov r1, s0
|
||||
; CHECK-LE-NEXT: movs r2, #0
|
||||
; CHECK-LE-NEXT: vand q0, q0, q2
|
||||
; CHECK-LE-NEXT: vand q1, q1, q2
|
||||
; CHECK-LE-NEXT: rsbs r3, r1, #0
|
||||
; CHECK-LE-NEXT: vmov r3, s6
|
||||
; CHECK-LE-NEXT: vmov r3, s2
|
||||
; CHECK-LE-NEXT: sbcs.w r1, r2, r1, asr #31
|
||||
; CHECK-LE-NEXT: mov.w r1, #0
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt r1, #1
|
||||
; CHECK-LE-NEXT: rsbs r5, r3, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r3, r2, r3, asr #31
|
||||
; CHECK-LE-NEXT: movlt.w r12, #1
|
||||
; CHECK-LE-NEXT: rsbs r1, r3, #0
|
||||
; CHECK-LE-NEXT: sbcs.w r1, r2, r3, asr #31
|
||||
; CHECK-LE-NEXT: it lt
|
||||
; CHECK-LE-NEXT: movlt r2, #1
|
||||
; CHECK-LE-NEXT: cmp r2, #0
|
||||
; CHECK-LE-NEXT: it ne
|
||||
; CHECK-LE-NEXT: mvnne r2, #1
|
||||
; CHECK-LE-NEXT: bfi r2, r1, #0, #1
|
||||
; CHECK-LE-NEXT: bfi r2, r12, #0, #1
|
||||
; CHECK-LE-NEXT: and r1, r2, #3
|
||||
; CHECK-LE-NEXT: lsls r2, r2, #31
|
||||
; CHECK-LE-NEXT: itt ne
|
||||
; CHECK-LE-NEXT: vmovne r2, r3, d0
|
||||
; CHECK-LE-NEXT: vmovne r2, r3, d2
|
||||
; CHECK-LE-NEXT: strdne r2, r3, [r0]
|
||||
; CHECK-LE-NEXT: lsls r1, r1, #30
|
||||
; CHECK-LE-NEXT: itt mi
|
||||
; CHECK-LE-NEXT: vmovmi r1, r2, d1
|
||||
; CHECK-LE-NEXT: vmovmi r1, r2, d3
|
||||
; CHECK-LE-NEXT: strdmi r1, r2, [r0, #8]
|
||||
; CHECK-LE-NEXT: add sp, #4
|
||||
; CHECK-LE-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-LE-NEXT: pop {r7, pc}
|
||||
;
|
||||
; CHECK-BE-LABEL: foo_zext_v2i64_v2i32_unaligned:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
|
@ -597,7 +612,9 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask,
|
|||
; CHECK-BE-NEXT: rsbs.w r1, lr, #0
|
||||
; CHECK-BE-NEXT: mov.w r3, #0
|
||||
; CHECK-BE-NEXT: sbcs.w r1, r3, lr, asr #31
|
||||
; CHECK-BE-NEXT: vmov q0[3], q0[1], lr, r12
|
||||
; CHECK-BE-NEXT: vmov.32 q0[1], r12
|
||||
; CHECK-BE-NEXT: @ implicit-def: $q1
|
||||
; CHECK-BE-NEXT: vmov.32 q0[3], lr
|
||||
; CHECK-BE-NEXT: mov.w lr, #0
|
||||
; CHECK-BE-NEXT: it lt
|
||||
; CHECK-BE-NEXT: movlt.w lr, #1
|
||||
|
@ -609,7 +626,6 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask,
|
|||
; CHECK-BE-NEXT: it ne
|
||||
; CHECK-BE-NEXT: mvnne r3, #1
|
||||
; CHECK-BE-NEXT: bfi r3, lr, #0, #1
|
||||
; CHECK-BE-NEXT: @ implicit-def: $q1
|
||||
; CHECK-BE-NEXT: and r1, r3, #3
|
||||
; CHECK-BE-NEXT: lsls r3, r3, #31
|
||||
; CHECK-BE-NEXT: beq .LBB8_2
|
||||
|
|
|
@ -40,17 +40,17 @@ define arm_aapcs_vfpcc <2 x i64> @smin_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r12, s5
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov lr, s3
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: sbcs.w r1, r1, r12
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r12, s7
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov lr, s1
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: sbcs.w r1, r1, r12
|
||||
; CHECK-NEXT: vmov r12, s5
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r1, #1
|
||||
|
@ -62,8 +62,10 @@ define arm_aapcs_vfpcc <2 x i64> @smin_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
|
|||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r0, r1
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r0, r1
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov.32 q2[2], r1
|
||||
; CHECK-NEXT: vmov.32 q2[3], r1
|
||||
; CHECK-NEXT: vbic q1, q1, q2
|
||||
; CHECK-NEXT: vand q0, q0, q2
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
|
@ -112,17 +114,17 @@ define arm_aapcs_vfpcc <2 x i64> @umin_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r12, s5
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov lr, s3
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: sbcs.w r1, r1, r12
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r12, s7
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov lr, s1
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: sbcs.w r1, r1, r12
|
||||
; CHECK-NEXT: vmov r12, s5
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r1, #1
|
||||
|
@ -134,8 +136,10 @@ define arm_aapcs_vfpcc <2 x i64> @umin_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
|
|||
; CHECK-NEXT: movlo r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r0, r1
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r0, r1
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov.32 q2[2], r1
|
||||
; CHECK-NEXT: vmov.32 q2[3], r1
|
||||
; CHECK-NEXT: vbic q1, q1, q2
|
||||
; CHECK-NEXT: vand q0, q0, q2
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
|
@ -185,17 +189,17 @@ define arm_aapcs_vfpcc <2 x i64> @smax_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: vmov r12, s1
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vmov lr, s7
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: sbcs.w r1, r1, r12
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: vmov r12, s3
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: vmov lr, s5
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: sbcs.w r1, r1, r12
|
||||
; CHECK-NEXT: vmov r12, s1
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r1, #1
|
||||
|
@ -207,8 +211,10 @@ define arm_aapcs_vfpcc <2 x i64> @smax_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
|
|||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r0, r1
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r0, r1
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov.32 q2[2], r1
|
||||
; CHECK-NEXT: vmov.32 q2[3], r1
|
||||
; CHECK-NEXT: vbic q1, q1, q2
|
||||
; CHECK-NEXT: vand q0, q0, q2
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
|
@ -257,17 +263,17 @@ define arm_aapcs_vfpcc <2 x i64> @umax_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: vmov r12, s1
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vmov lr, s7
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: sbcs.w r1, r1, r12
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: vmov r12, s3
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: vmov lr, s5
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: sbcs.w r1, r1, r12
|
||||
; CHECK-NEXT: vmov r12, s1
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r1, #1
|
||||
|
@ -279,8 +285,10 @@ define arm_aapcs_vfpcc <2 x i64> @umax_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
|
|||
; CHECK-NEXT: movlo r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r0, r1
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r0, r1
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov.32 q2[2], r1
|
||||
; CHECK-NEXT: vmov.32 q2[3], r1
|
||||
; CHECK-NEXT: vbic q1, q1, q2
|
||||
; CHECK-NEXT: vand q0, q0, q2
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
|
@ -369,12 +377,12 @@ define arm_aapcs_vfpcc <2 x double> @maxnm_float64_t(<2 x double> %src1, <2 x do
|
|||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vmov q4, q1
|
||||
; CHECK-NEXT: vmov q5, q0
|
||||
; CHECK-NEXT: vmov r0, r1, d8
|
||||
; CHECK-NEXT: vmov r2, r3, d10
|
||||
; CHECK-NEXT: bl __aeabi_dcmpgt
|
||||
; CHECK-NEXT: vmov r12, r1, d9
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: vmov r0, r1, d9
|
||||
; CHECK-NEXT: vmov r2, r3, d11
|
||||
; CHECK-NEXT: bl __aeabi_dcmpgt
|
||||
; CHECK-NEXT: vmov r12, r1, d8
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: vmov r2, r3, d10
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: movne r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
|
@ -386,8 +394,10 @@ define arm_aapcs_vfpcc <2 x double> @maxnm_float64_t(<2 x double> %src1, <2 x do
|
|||
; CHECK-NEXT: movne r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r4
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r4
|
||||
; CHECK-NEXT: vmov.32 q0[0], r0
|
||||
; CHECK-NEXT: vmov.32 q0[1], r0
|
||||
; CHECK-NEXT: vmov.32 q0[2], r4
|
||||
; CHECK-NEXT: vmov.32 q0[3], r4
|
||||
; CHECK-NEXT: vbic q1, q5, q0
|
||||
; CHECK-NEXT: vand q0, q4, q0
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
|
|
|
@ -34,17 +34,19 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @neg_v2i64(<2 x i64> %s1) {
|
||||
; CHECK-LABEL: neg_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: sbc.w r0, r12, r0
|
||||
; CHECK-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-NEXT: sbc.w r3, r12, r3
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r1
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r0
|
||||
; CHECK-NEXT: vmov.32 q0[0], r2
|
||||
; CHECK-NEXT: vmov.32 q0[1], r3
|
||||
; CHECK-NEXT: vmov.32 q0[2], r1
|
||||
; CHECK-NEXT: vmov.32 q0[3], r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = sub nsw <2 x i64> zeroinitializer, %s1
|
||||
|
|
|
@ -149,60 +149,67 @@ define dso_local i32 @e() #0 {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: .pad #392
|
||||
; CHECK-NEXT: sub sp, #392
|
||||
; CHECK-NEXT: movw r10, :lower16:.L_MergedGlobals
|
||||
; CHECK-NEXT: movw r9, :lower16:.L_MergedGlobals
|
||||
; CHECK-NEXT: vldr s0, .LCPI1_0
|
||||
; CHECK-NEXT: movt r10, :upper16:.L_MergedGlobals
|
||||
; CHECK-NEXT: movt r9, :upper16:.L_MergedGlobals
|
||||
; CHECK-NEXT: vldr s3, .LCPI1_1
|
||||
; CHECK-NEXT: mov r6, r10
|
||||
; CHECK-NEXT: mov r7, r10
|
||||
; CHECK-NEXT: ldr r1, [r6, #4]!
|
||||
; CHECK-NEXT: movw r5, :lower16:e
|
||||
; CHECK-NEXT: ldr r0, [r7, #8]!
|
||||
; CHECK-NEXT: vmov r4, s3
|
||||
; CHECK-NEXT: vmov s1, r6
|
||||
; CHECK-NEXT: movt r5, :upper16:e
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r7, r7
|
||||
; CHECK-NEXT: vmov s9, r5
|
||||
; CHECK-NEXT: vdup.32 q4, r6
|
||||
; CHECK-NEXT: mov r5, r9
|
||||
; CHECK-NEXT: mov r7, r9
|
||||
; CHECK-NEXT: ldr r1, [r5, #8]!
|
||||
; CHECK-NEXT: vmov r6, s3
|
||||
; CHECK-NEXT: ldr r0, [r7, #4]!
|
||||
; CHECK-NEXT: movw r4, :lower16:e
|
||||
; CHECK-NEXT: vmov.32 q4[0], r5
|
||||
; CHECK-NEXT: movt r4, :upper16:e
|
||||
; CHECK-NEXT: vmov q1, q4
|
||||
; CHECK-NEXT: vmov s1, r7
|
||||
; CHECK-NEXT: vmov.32 q1[1], r6
|
||||
; CHECK-NEXT: vmov.32 q5[0], r7
|
||||
; CHECK-NEXT: vmov.32 q1[2], r5
|
||||
; CHECK-NEXT: vmov s9, r4
|
||||
; CHECK-NEXT: vmov.32 q1[3], r4
|
||||
; CHECK-NEXT: vdup.32 q6, r7
|
||||
; CHECK-NEXT: vstrw.32 q1, [sp, #76]
|
||||
; CHECK-NEXT: vmov q1, q5
|
||||
; CHECK-NEXT: vmov.32 q1[1], r7
|
||||
; CHECK-NEXT: vmov.f32 s2, s1
|
||||
; CHECK-NEXT: vmov.f32 s8, s0
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r5, r4
|
||||
; CHECK-NEXT: vmov q3, q4
|
||||
; CHECK-NEXT: vmov q5, q4
|
||||
; CHECK-NEXT: vmov.32 q1[2], r6
|
||||
; CHECK-NEXT: vmov q3, q6
|
||||
; CHECK-NEXT: vmov q7, q6
|
||||
; CHECK-NEXT: vmov.f32 s10, s1
|
||||
; CHECK-NEXT: vstrw.32 q1, [sp, #76]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r4, r6
|
||||
; CHECK-NEXT: mov.w r8, #4
|
||||
; CHECK-NEXT: mov.w r9, #0
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r5, r6
|
||||
; CHECK-NEXT: vmov.32 q3[0], r5
|
||||
; CHECK-NEXT: vmov.32 q5[1], r5
|
||||
; CHECK-NEXT: vmov.f32 s11, s3
|
||||
; CHECK-NEXT: str r1, [sp, #24]
|
||||
; CHECK-NEXT: mov.w r10, #0
|
||||
; CHECK-NEXT: vmov.32 q1[3], r4
|
||||
; CHECK-NEXT: vmov.32 q3[0], r4
|
||||
; CHECK-NEXT: vmov.32 q7[1], r4
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: vmov.f32 s11, s3
|
||||
; CHECK-NEXT: movs r1, #64
|
||||
; CHECK-NEXT: strh.w r8, [sp, #390]
|
||||
; CHECK-NEXT: strd r0, r10, [sp, #24]
|
||||
; CHECK-NEXT: vstrw.32 q0, [sp, #44]
|
||||
; CHECK-NEXT: str.w r9, [sp, #28]
|
||||
; CHECK-NEXT: vstrw.32 q2, [r0]
|
||||
; CHECK-NEXT: str r0, [r0]
|
||||
; CHECK-NEXT: vstrw.32 q5, [r0]
|
||||
; CHECK-NEXT: vstrw.32 q2, [r0]
|
||||
; CHECK-NEXT: vstrw.32 q7, [r0]
|
||||
; CHECK-NEXT: vstrw.32 q3, [r0]
|
||||
; CHECK-NEXT: vstrw.32 q1, [r0]
|
||||
; CHECK-NEXT: bl __aeabi_memclr4
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r6, r7
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r6, r6
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r7, r5
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r4, r7
|
||||
; CHECK-NEXT: vmov.32 q4[0], r9
|
||||
; CHECK-NEXT: vstrw.32 q0, [r0]
|
||||
; CHECK-NEXT: str.w r9, [r10]
|
||||
; CHECK-NEXT: vmov.32 q5[1], r5
|
||||
; CHECK-NEXT: vmov.32 q4[1], r4
|
||||
; CHECK-NEXT: vmov.32 q5[2], r7
|
||||
; CHECK-NEXT: vmov.32 q4[2], r7
|
||||
; CHECK-NEXT: vmov.32 q5[3], r6
|
||||
; CHECK-NEXT: vmov.32 q6[0], r10
|
||||
; CHECK-NEXT: vmov.32 q4[3], r5
|
||||
; CHECK-NEXT: str.w r10, [r9]
|
||||
; CHECK-NEXT: vstrw.32 q4, [r0]
|
||||
; CHECK-NEXT: vstrw.32 q1, [r0]
|
||||
; CHECK-NEXT: vstrw.32 q6, [r0]
|
||||
; CHECK-NEXT: vstrw.32 q5, [r0]
|
||||
; CHECK-NEXT: str.w r8, [sp, #308]
|
||||
; CHECK-NEXT: .LBB1_1: @ %for.cond
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
|
|
|
@ -577,20 +577,22 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) {
|
|||
; CHECK-NEXT: vorr q2, q0, q1
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov r1, s8
|
||||
; CHECK-NEXT: vmov r2, s10
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s11
|
||||
; CHECK-NEXT: vmov r1, s10
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vbic q1, q1, q2
|
||||
; CHECK-NEXT: vand q0, q0, q2
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vbic q1, q1, q3
|
||||
; CHECK-NEXT: vand q0, q0, q3
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
|
@ -607,40 +609,44 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeq_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i6
|
|||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
; CHECK-NEXT: vmov r1, s8
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s7
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s11
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
; CHECK-NEXT: vmov r1, s10
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s10
|
||||
; CHECK-NEXT: eors r2, r3
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: vand q2, q3, q2
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: vand q2, q2, q3
|
||||
; CHECK-NEXT: vbic q1, q1, q2
|
||||
; CHECK-NEXT: vand q0, q0, q2
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
|
@ -661,33 +667,37 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeqr_v2i1(<2 x i64> %a, <2 x i64> %b, i64 %c
|
|||
; CHECK-NEXT: eors r2, r1
|
||||
; CHECK-NEXT: eors r3, r0
|
||||
; CHECK-NEXT: orrs r2, r3
|
||||
; CHECK-NEXT: vmov r3, s7
|
||||
; CHECK-NEXT: cset r2, eq
|
||||
; CHECK-NEXT: tst.w r2, #1
|
||||
; CHECK-NEXT: csetm r2, ne
|
||||
; CHECK-NEXT: eors r1, r3
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: eors r0, r3
|
||||
; CHECK-NEXT: vmov.32 q2[0], r2
|
||||
; CHECK-NEXT: vmov.32 q2[1], r2
|
||||
; CHECK-NEXT: vmov r2, s7
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: eors r0, r2
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r0, r2
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r0, r2
|
||||
; CHECK-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vand q2, q3, q2
|
||||
; CHECK-NEXT: vbic q1, q1, q2
|
||||
; CHECK-NEXT: vand q0, q0, q2
|
||||
|
|
|
@ -13,12 +13,14 @@ define arm_aapcs_vfpcc <4 x i32> @bitcast_to_v4i1(i4 %b, <4 x i32> %a) {
|
|||
; CHECK-LE-NEXT: vmsr p0, r0
|
||||
; CHECK-LE-NEXT: vpsel q1, q2, q1
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q1[0]
|
||||
; CHECK-LE-NEXT: vmov.u8 r1, q1[2]
|
||||
; CHECK-LE-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-LE-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q1[1]
|
||||
; CHECK-LE-NEXT: vmov.u8 r1, q1[3]
|
||||
; CHECK-LE-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q1[2]
|
||||
; CHECK-LE-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q1[3]
|
||||
; CHECK-LE-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-LE-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-LE-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-LE-NEXT: vcmp.i32 ne, q2, zr
|
||||
; CHECK-LE-NEXT: vpsel q0, q0, q1
|
||||
; CHECK-LE-NEXT: add sp, #4
|
||||
|
@ -34,14 +36,16 @@ define arm_aapcs_vfpcc <4 x i32> @bitcast_to_v4i1(i4 %b, <4 x i32> %a) {
|
|||
; CHECK-BE-NEXT: vmsr p0, r0
|
||||
; CHECK-BE-NEXT: vpsel q1, q2, q1
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q1[0]
|
||||
; CHECK-BE-NEXT: vmov.u8 r1, q1[2]
|
||||
; CHECK-BE-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-BE-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q1[1]
|
||||
; CHECK-BE-NEXT: vmov.u8 r1, q1[3]
|
||||
; CHECK-BE-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q1[2]
|
||||
; CHECK-BE-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q1[3]
|
||||
; CHECK-BE-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-BE-NEXT: vrev64.32 q1, q0
|
||||
; CHECK-BE-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-BE-NEXT: vcmp.i32 ne, q2, zr
|
||||
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-BE-NEXT: vpsel q1, q1, q0
|
||||
; CHECK-BE-NEXT: vrev64.32 q0, q1
|
||||
; CHECK-BE-NEXT: add sp, #4
|
||||
|
@ -175,11 +179,13 @@ define arm_aapcs_vfpcc <2 x i64> @bitcast_to_v2i1(i2 %b, <2 x i64> %a) {
|
|||
; CHECK-LE-NEXT: sub sp, #4
|
||||
; CHECK-LE-NEXT: and r1, r0, #2
|
||||
; CHECK-LE-NEXT: and r0, r0, #1
|
||||
; CHECK-LE-NEXT: movs r2, #0
|
||||
; CHECK-LE-NEXT: rsbs r0, r0, #0
|
||||
; CHECK-LE-NEXT: movs r2, #0
|
||||
; CHECK-LE-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-LE-NEXT: sub.w r1, r2, r1, lsr #1
|
||||
; CHECK-LE-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-LE-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-LE-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-LE-NEXT: vmov.32 q1[2], r1
|
||||
; CHECK-LE-NEXT: vmov.32 q1[3], r1
|
||||
; CHECK-LE-NEXT: vand q0, q0, q1
|
||||
; CHECK-LE-NEXT: add sp, #4
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
|
@ -189,12 +195,14 @@ define arm_aapcs_vfpcc <2 x i64> @bitcast_to_v2i1(i2 %b, <2 x i64> %a) {
|
|||
; CHECK-BE-NEXT: .pad #4
|
||||
; CHECK-BE-NEXT: sub sp, #4
|
||||
; CHECK-BE-NEXT: and r1, r0, #2
|
||||
; CHECK-BE-NEXT: and r0, r0, #1
|
||||
; CHECK-BE-NEXT: movs r2, #0
|
||||
; CHECK-BE-NEXT: rsbs r0, r0, #0
|
||||
; CHECK-BE-NEXT: and r0, r0, #1
|
||||
; CHECK-BE-NEXT: sub.w r1, r2, r1, lsr #1
|
||||
; CHECK-BE-NEXT: vmov q1[2], q1[0], r0, r1
|
||||
; CHECK-BE-NEXT: vmov q1[3], q1[1], r0, r1
|
||||
; CHECK-BE-NEXT: rsbs r0, r0, #0
|
||||
; CHECK-BE-NEXT: vmov.32 q1[0], r1
|
||||
; CHECK-BE-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-BE-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-BE-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-BE-NEXT: vrev64.32 q2, q1
|
||||
; CHECK-BE-NEXT: vand q0, q0, q2
|
||||
; CHECK-BE-NEXT: add sp, #4
|
||||
|
|
|
@ -46,12 +46,12 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @sext_v2i1_v2i64(<2 x i64> %src) {
|
||||
; CHECK-LABEL: sext_v2i1_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: movs r2, #0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: sbcs.w r0, r2, r0
|
||||
; CHECK-NEXT: mov.w r0, #0
|
||||
; CHECK-NEXT: it lt
|
||||
|
@ -64,8 +64,10 @@ define arm_aapcs_vfpcc <2 x i64> @sext_v2i1_v2i64(<2 x i64> %src) {
|
|||
; CHECK-NEXT: movlt r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q0[0], r1
|
||||
; CHECK-NEXT: vmov.32 q0[1], r1
|
||||
; CHECK-NEXT: vmov.32 q0[2], r0
|
||||
; CHECK-NEXT: vmov.32 q0[3], r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = icmp sgt <2 x i64> %src, zeroinitializer
|
||||
|
@ -119,14 +121,14 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @zext_v2i1_v2i64(<2 x i64> %src) {
|
||||
; CHECK-LABEL: zext_v2i1_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: adr r1, .LCPI7_0
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r1]
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: sbcs.w r1, r0, r1
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: it lt
|
||||
|
@ -139,7 +141,8 @@ define arm_aapcs_vfpcc <2 x i64> @zext_v2i1_v2i64(<2 x i64> %src) {
|
|||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
|
||||
; CHECK-NEXT: vmov.32 q0[0], r0
|
||||
; CHECK-NEXT: vmov.32 q0[2], r1
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: .p2align 4
|
||||
|
@ -198,14 +201,16 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @trunc_v2i1_v2i64(<2 x i64> %src) {
|
||||
; CHECK-LABEL: trunc_v2i1_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: and r0, r0, #1
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: and r1, r1, #1
|
||||
; CHECK-NEXT: rsbs r0, r0, #0
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: and r0, r0, #1
|
||||
; CHECK-NEXT: vmov.32 q1[0], r1
|
||||
; CHECK-NEXT: rsbs r0, r0, #0
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
|
|
|
@ -11,12 +11,14 @@ define arm_aapcs_vfpcc <4 x i32> @load_v4i1(<4 x i1> *%src, <4 x i32> %a) {
|
|||
; CHECK-LE-NEXT: vmsr p0, r0
|
||||
; CHECK-LE-NEXT: vpsel q1, q2, q1
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q1[0]
|
||||
; CHECK-LE-NEXT: vmov.u8 r1, q1[2]
|
||||
; CHECK-LE-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-LE-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q1[1]
|
||||
; CHECK-LE-NEXT: vmov.u8 r1, q1[3]
|
||||
; CHECK-LE-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q1[2]
|
||||
; CHECK-LE-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-LE-NEXT: vmov.u8 r0, q1[3]
|
||||
; CHECK-LE-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-LE-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-LE-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-LE-NEXT: vcmp.i32 ne, q2, zr
|
||||
; CHECK-LE-NEXT: vpsel q0, q0, q1
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
|
@ -29,14 +31,16 @@ define arm_aapcs_vfpcc <4 x i32> @load_v4i1(<4 x i1> *%src, <4 x i32> %a) {
|
|||
; CHECK-BE-NEXT: vmsr p0, r0
|
||||
; CHECK-BE-NEXT: vpsel q1, q2, q1
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q1[0]
|
||||
; CHECK-BE-NEXT: vmov.u8 r1, q1[2]
|
||||
; CHECK-BE-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-BE-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q1[1]
|
||||
; CHECK-BE-NEXT: vmov.u8 r1, q1[3]
|
||||
; CHECK-BE-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q1[2]
|
||||
; CHECK-BE-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-BE-NEXT: vmov.u8 r0, q1[3]
|
||||
; CHECK-BE-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-BE-NEXT: vrev64.32 q1, q0
|
||||
; CHECK-BE-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-BE-NEXT: vcmp.i32 ne, q2, zr
|
||||
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-BE-NEXT: vpsel q1, q1, q0
|
||||
; CHECK-BE-NEXT: vrev64.32 q0, q1
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
|
@ -141,11 +145,13 @@ define arm_aapcs_vfpcc <2 x i64> @load_v2i1(<2 x i1> *%src, <2 x i64> %a) {
|
|||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: ldrb r0, [r0]
|
||||
; CHECK-LE-NEXT: and r1, r0, #1
|
||||
; CHECK-LE-NEXT: ubfx r0, r0, #1, #1
|
||||
; CHECK-LE-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-LE-NEXT: ubfx r0, r0, #1, #1
|
||||
; CHECK-LE-NEXT: vmov.32 q1[0], r1
|
||||
; CHECK-LE-NEXT: rsbs r0, r0, #0
|
||||
; CHECK-LE-NEXT: vmov q1[2], q1[0], r0, r1
|
||||
; CHECK-LE-NEXT: vmov q1[3], q1[1], r0, r1
|
||||
; CHECK-LE-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-LE-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-LE-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-LE-NEXT: vand q0, q0, q1
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
|
@ -156,8 +162,10 @@ define arm_aapcs_vfpcc <2 x i64> @load_v2i1(<2 x i1> *%src, <2 x i64> %a) {
|
|||
; CHECK-BE-NEXT: and r0, r0, #1
|
||||
; CHECK-BE-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-BE-NEXT: rsbs r0, r0, #0
|
||||
; CHECK-BE-NEXT: vmov q1[2], q1[0], r0, r1
|
||||
; CHECK-BE-NEXT: vmov q1[3], q1[1], r0, r1
|
||||
; CHECK-BE-NEXT: vmov.32 q1[0], r1
|
||||
; CHECK-BE-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-BE-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-BE-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-BE-NEXT: vrev64.32 q2, q1
|
||||
; CHECK-BE-NEXT: vand q0, q0, q2
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
|
|
|
@ -325,18 +325,20 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: vbic q0, q0, q2
|
||||
; CHECK-NEXT: vand q1, q1, q2
|
||||
; CHECK-NEXT: vorr q0, q1, q0
|
||||
|
@ -353,18 +355,20 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeq_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i6
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: vbic q0, q0, q2
|
||||
; CHECK-NEXT: vand q1, q1, q2
|
||||
; CHECK-NEXT: vorr q0, q1, q0
|
||||
|
|
|
@ -379,32 +379,36 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vorr q2, q3, q2
|
||||
; CHECK-NEXT: vbic q1, q1, q2
|
||||
; CHECK-NEXT: vand q0, q0, q2
|
||||
|
@ -424,40 +428,44 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeq_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i6
|
|||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
; CHECK-NEXT: vmov r1, s8
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s7
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s11
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
; CHECK-NEXT: vmov r1, s10
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s10
|
||||
; CHECK-NEXT: eors r2, r3
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: vorr q2, q3, q2
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: vorr q2, q2, q3
|
||||
; CHECK-NEXT: vbic q1, q1, q2
|
||||
; CHECK-NEXT: vand q0, q0, q2
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
|
|
|
@ -316,11 +316,13 @@ define <4 x i32> @shuffle5_b_v4i32(<8 x i16> %src, <4 x i32> %a, <4 x i32> %b) {
|
|||
; CHECK-NEXT: vmov.i8 q0, #0x0
|
||||
; CHECK-NEXT: vpsel q0, q1, q0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[0]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[2]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[1]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[3]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[2]
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[3]
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: add r0, sp, #16
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
|
@ -347,11 +349,13 @@ define <4 x i32> @shuffle5_t_v4i32(<8 x i16> %src, <4 x i32> %a, <4 x i32> %b) {
|
|||
; CHECK-NEXT: vmov.i8 q0, #0x0
|
||||
; CHECK-NEXT: vpsel q0, q1, q0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[4]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[6]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[5]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[7]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[6]
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[7]
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: add r0, sp, #16
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
|
|
|
@ -459,32 +459,36 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: veor q2, q3, q2
|
||||
; CHECK-NEXT: vbic q1, q1, q2
|
||||
; CHECK-NEXT: vand q0, q0, q2
|
||||
|
@ -504,40 +508,44 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeq_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i6
|
|||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
; CHECK-NEXT: vmov r1, s8
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s7
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s11
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
; CHECK-NEXT: vmov r1, s10
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s10
|
||||
; CHECK-NEXT: eors r2, r3
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: veor q2, q3, q2
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: veor q2, q2, q3
|
||||
; CHECK-NEXT: vbic q1, q1, q2
|
||||
; CHECK-NEXT: vand q0, q0, q2
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
|
|
|
@ -20,7 +20,7 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32*
|
|||
; CHECK-NEXT: @ %bb.2:
|
||||
; CHECK-NEXT: movs r7, #0
|
||||
; CHECK-NEXT: mov r12, r0
|
||||
; CHECK-NEXT: mov r10, r1
|
||||
; CHECK-NEXT: mov r6, r1
|
||||
; CHECK-NEXT: mov r11, r2
|
||||
; CHECK-NEXT: b .LBB0_6
|
||||
; CHECK-NEXT: .LBB0_3: @ %vector.ph
|
||||
|
@ -32,65 +32,69 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32*
|
|||
; CHECK-NEXT: str r3, [sp] @ 4-byte Spill
|
||||
; CHECK-NEXT: add.w lr, r6, r7, lsr #1
|
||||
; CHECK-NEXT: add.w r11, r2, r3, lsl #2
|
||||
; CHECK-NEXT: add.w r10, r1, r3, lsl #2
|
||||
; CHECK-NEXT: add.w r6, r1, r3, lsl #2
|
||||
; CHECK-NEXT: add.w r12, r0, r3, lsl #2
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r4]
|
||||
; CHECK-NEXT: mvn r10, #-2147483648
|
||||
; CHECK-NEXT: vmvn.i32 q1, #0x80000000
|
||||
; CHECK-NEXT: .LBB0_4: @ %vector.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldrd r4, r5, [r0]
|
||||
; CHECK-NEXT: mov.w r3, #-1
|
||||
; CHECK-NEXT: ldrd r7, r8, [r1]
|
||||
; CHECK-NEXT: ldrd r4, r8, [r0]
|
||||
; CHECK-NEXT: adds r0, #8
|
||||
; CHECK-NEXT: smull r6, r5, r8, r5
|
||||
; CHECK-NEXT: ldrd r7, r5, [r1]
|
||||
; CHECK-NEXT: adds r1, #8
|
||||
; CHECK-NEXT: smull r8, r5, r5, r8
|
||||
; CHECK-NEXT: smull r4, r7, r7, r4
|
||||
; CHECK-NEXT: asrl r6, r5, #31
|
||||
; CHECK-NEXT: asrl r8, r5, #31
|
||||
; CHECK-NEXT: asrl r4, r7, #31
|
||||
; CHECK-NEXT: rsbs.w r9, r4, #-2147483648
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r6, r4
|
||||
; CHECK-NEXT: sbcs r3, r7
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r5, r7
|
||||
; CHECK-NEXT: vmov.32 q4[0], r4
|
||||
; CHECK-NEXT: mov.w r9, #-1
|
||||
; CHECK-NEXT: sbcs.w r3, r9, r7
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: vmov.32 q4[1], r7
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r9, ne
|
||||
; CHECK-NEXT: rsbs.w r3, r6, #-2147483648
|
||||
; CHECK-NEXT: mov.w r3, #-1
|
||||
; CHECK-NEXT: mvn r6, #-2147483648
|
||||
; CHECK-NEXT: sbcs r3, r5
|
||||
; CHECK-NEXT: csetm r3, ne
|
||||
; CHECK-NEXT: vmov.32 q4[2], r8
|
||||
; CHECK-NEXT: vmov.32 q2[0], r3
|
||||
; CHECK-NEXT: vmov.32 q4[3], r5
|
||||
; CHECK-NEXT: vmov.32 q2[1], r3
|
||||
; CHECK-NEXT: rsbs.w r3, r8, #-2147483648
|
||||
; CHECK-NEXT: sbcs.w r3, r9, r5
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r3, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r3, r9
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r3, r9
|
||||
; CHECK-NEXT: vmov.32 q2[2], r3
|
||||
; CHECK-NEXT: vmov.32 q2[3], r3
|
||||
; CHECK-NEXT: vbic q3, q0, q2
|
||||
; CHECK-NEXT: vand q2, q4, q2
|
||||
; CHECK-NEXT: vorr q2, q2, q3
|
||||
; CHECK-NEXT: vmov r4, s8
|
||||
; CHECK-NEXT: vmov r3, s9
|
||||
; CHECK-NEXT: vmov r5, s10
|
||||
; CHECK-NEXT: subs r4, r4, r6
|
||||
; CHECK-NEXT: vmov r4, s11
|
||||
; CHECK-NEXT: subs.w r4, r4, r10
|
||||
; CHECK-NEXT: sbcs r3, r3, #0
|
||||
; CHECK-NEXT: vmov r4, s10
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r3, ne
|
||||
; CHECK-NEXT: vmov.32 q3[0], r3
|
||||
; CHECK-NEXT: vmov.32 q3[1], r3
|
||||
; CHECK-NEXT: vmov r3, s11
|
||||
; CHECK-NEXT: subs.w r4, r4, r10
|
||||
; CHECK-NEXT: sbcs r3, r3, #0
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r3, ne
|
||||
; CHECK-NEXT: subs r5, r5, r6
|
||||
; CHECK-NEXT: vmov.32 q3[1], r3
|
||||
; CHECK-NEXT: sbcs r4, r4, #0
|
||||
; CHECK-NEXT: mov.w r4, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r4, #1
|
||||
; CHECK-NEXT: cmp r4, #0
|
||||
; CHECK-NEXT: csetm r4, ne
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r4, r3
|
||||
; CHECK-NEXT: vmov.32 q3[2], r3
|
||||
; CHECK-NEXT: vbic q4, q1, q3
|
||||
; CHECK-NEXT: vand q2, q2, q3
|
||||
; CHECK-NEXT: vorr q2, q2, q4
|
||||
|
@ -112,7 +116,7 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32*
|
|||
; CHECK-NEXT: .LBB0_7: @ %for.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldr r3, [r12], #4
|
||||
; CHECK-NEXT: ldr r4, [r10], #4
|
||||
; CHECK-NEXT: ldr r4, [r6], #4
|
||||
; CHECK-NEXT: smull r4, r3, r4, r3
|
||||
; CHECK-NEXT: asrl r4, r3, #31
|
||||
; CHECK-NEXT: subs r5, r1, r4
|
||||
|
@ -225,141 +229,149 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(i32* nocapture readonly %pSrcA, i32*
|
|||
; CHECK-NEXT: sub sp, #4
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NEXT: .pad #16
|
||||
; CHECK-NEXT: sub sp, #16
|
||||
; CHECK-NEXT: .pad #8
|
||||
; CHECK-NEXT: sub sp, #8
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: beq.w .LBB1_8
|
||||
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
|
||||
; CHECK-NEXT: mov r9, r1
|
||||
; CHECK-NEXT: movs r7, #0
|
||||
; CHECK-NEXT: cmp r3, #3
|
||||
; CHECK-NEXT: bhi .LBB1_3
|
||||
; CHECK-NEXT: @ %bb.2:
|
||||
; CHECK-NEXT: mov r12, r0
|
||||
; CHECK-NEXT: mov r9, r1
|
||||
; CHECK-NEXT: mov r1, r9
|
||||
; CHECK-NEXT: mov r11, r2
|
||||
; CHECK-NEXT: b .LBB1_6
|
||||
; CHECK-NEXT: .LBB1_3: @ %vector.ph
|
||||
; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
|
||||
; CHECK-NEXT: bic r3, r3, #3
|
||||
; CHECK-NEXT: subs r7, r3, #4
|
||||
; CHECK-NEXT: bic r7, r3, #3
|
||||
; CHECK-NEXT: adr r4, .LCPI1_0
|
||||
; CHECK-NEXT: movs r6, #1
|
||||
; CHECK-NEXT: subs r1, r7, #4
|
||||
; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
|
||||
; CHECK-NEXT: movs r3, #1
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r4]
|
||||
; CHECK-NEXT: add.w lr, r3, r1, lsr #2
|
||||
; CHECK-NEXT: str r7, [sp] @ 4-byte Spill
|
||||
; CHECK-NEXT: adr r4, .LCPI1_1
|
||||
; CHECK-NEXT: add.w lr, r6, r7, lsr #2
|
||||
; CHECK-NEXT: str r3, [sp] @ 4-byte Spill
|
||||
; CHECK-NEXT: add.w r11, r2, r3, lsl #2
|
||||
; CHECK-NEXT: add.w r9, r1, r3, lsl #2
|
||||
; CHECK-NEXT: add.w r12, r0, r3, lsl #2
|
||||
; CHECK-NEXT: add.w r11, r2, r7, lsl #2
|
||||
; CHECK-NEXT: add.w r1, r9, r7, lsl #2
|
||||
; CHECK-NEXT: add.w r12, r0, r7, lsl #2
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r4]
|
||||
; CHECK-NEXT: mov.w r10, #-1
|
||||
; CHECK-NEXT: .LBB1_4: @ %vector.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r1], #16
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0], #16
|
||||
; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill
|
||||
; CHECK-NEXT: mov.w r2, #-1
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r9], #16
|
||||
; CHECK-NEXT: vmov.f32 s16, s10
|
||||
; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill
|
||||
; CHECK-NEXT: vmov.f32 s20, s14
|
||||
; CHECK-NEXT: vmov.f32 s18, s11
|
||||
; CHECK-NEXT: vmov.f32 s22, s15
|
||||
; CHECK-NEXT: vmullb.s32 q6, q5, q4
|
||||
; CHECK-NEXT: vmov.f32 s14, s13
|
||||
; CHECK-NEXT: vmov.f32 s10, s9
|
||||
; CHECK-NEXT: vmov r7, s25
|
||||
; CHECK-NEXT: vmov r4, s24
|
||||
; CHECK-NEXT: asrl r4, r7, #31
|
||||
; CHECK-NEXT: vmov r10, s26
|
||||
; CHECK-NEXT: vmov r8, s26
|
||||
; CHECK-NEXT: rsbs.w r5, r4, #-2147483648
|
||||
; CHECK-NEXT: vmov.f32 s10, s9
|
||||
; CHECK-NEXT: sbcs.w r5, r2, r7
|
||||
; CHECK-NEXT: vmov.f32 s14, s13
|
||||
; CHECK-NEXT: sbcs.w r5, r10, r7
|
||||
; CHECK-NEXT: mov.w r5, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r5, #1
|
||||
; CHECK-NEXT: cmp r5, #0
|
||||
; CHECK-NEXT: csetm r5, ne
|
||||
; CHECK-NEXT: vmov.32 q4[0], r5
|
||||
; CHECK-NEXT: vmov.32 q4[1], r5
|
||||
; CHECK-NEXT: vmov r5, s27
|
||||
; CHECK-NEXT: csetm r8, ne
|
||||
; CHECK-NEXT: asrl r10, r5, #31
|
||||
; CHECK-NEXT: vmov r6, s14
|
||||
; CHECK-NEXT: rsbs.w r3, r10, #-2147483648
|
||||
; CHECK-NEXT: vmov q6[2], q6[0], r10, r4
|
||||
; CHECK-NEXT: sbcs.w r3, r2, r5
|
||||
; CHECK-NEXT: vmov q6[3], q6[1], r5, r7
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: asrl r8, r5, #31
|
||||
; CHECK-NEXT: vmov.32 q6[0], r4
|
||||
; CHECK-NEXT: rsbs.w r6, r8, #-2147483648
|
||||
; CHECK-NEXT: vmov.32 q6[1], r7
|
||||
; CHECK-NEXT: sbcs.w r6, r10, r5
|
||||
; CHECK-NEXT: vmov.32 q6[2], r8
|
||||
; CHECK-NEXT: mov.w r6, #0
|
||||
; CHECK-NEXT: vmov.32 q6[3], r5
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r3, ne
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r3, r8
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r3, r8
|
||||
; CHECK-NEXT: movlt r6, #1
|
||||
; CHECK-NEXT: cmp r6, #0
|
||||
; CHECK-NEXT: csetm r6, ne
|
||||
; CHECK-NEXT: mvn r8, #-2147483648
|
||||
; CHECK-NEXT: vmov.32 q4[2], r6
|
||||
; CHECK-NEXT: vmov.32 q4[3], r6
|
||||
; CHECK-NEXT: vmov r6, s14
|
||||
; CHECK-NEXT: vbic q5, q0, q4
|
||||
; CHECK-NEXT: vand q4, q6, q4
|
||||
; CHECK-NEXT: vorr q4, q4, q5
|
||||
; CHECK-NEXT: vmov r4, s16
|
||||
; CHECK-NEXT: vmov r3, s17
|
||||
; CHECK-NEXT: vmov r5, s18
|
||||
; CHECK-NEXT: subs.w r4, r4, r8
|
||||
; CHECK-NEXT: vmov r4, s19
|
||||
; CHECK-NEXT: sbcs r3, r3, #0
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r3, ne
|
||||
; CHECK-NEXT: vmov r5, s16
|
||||
; CHECK-NEXT: vmov r4, s17
|
||||
; CHECK-NEXT: subs.w r5, r5, r8
|
||||
; CHECK-NEXT: vmov.32 q5[1], r3
|
||||
; CHECK-NEXT: vmov r5, s10
|
||||
; CHECK-NEXT: sbcs r4, r4, #0
|
||||
; CHECK-NEXT: vmov r5, s18
|
||||
; CHECK-NEXT: mov.w r4, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r4, #1
|
||||
; CHECK-NEXT: cmp r4, #0
|
||||
; CHECK-NEXT: csetm r4, ne
|
||||
; CHECK-NEXT: vmov.32 q5[0], r4
|
||||
; CHECK-NEXT: vmov.32 q5[1], r4
|
||||
; CHECK-NEXT: vmov r4, s19
|
||||
; CHECK-NEXT: subs.w r5, r5, r8
|
||||
; CHECK-NEXT: vmov r5, s12
|
||||
; CHECK-NEXT: sbcs r4, r4, #0
|
||||
; CHECK-NEXT: mov.w r4, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r4, #1
|
||||
; CHECK-NEXT: cmp r4, #0
|
||||
; CHECK-NEXT: csetm r4, ne
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r4, r3
|
||||
; CHECK-NEXT: vmov r3, s8
|
||||
; CHECK-NEXT: vmov r4, s12
|
||||
; CHECK-NEXT: smull r6, r5, r6, r5
|
||||
; CHECK-NEXT: vmov.32 q5[2], r4
|
||||
; CHECK-NEXT: vmov r4, s8
|
||||
; CHECK-NEXT: vbic q6, q1, q5
|
||||
; CHECK-NEXT: vand q4, q4, q5
|
||||
; CHECK-NEXT: vorr q4, q4, q6
|
||||
; CHECK-NEXT: asrl r6, r5, #31
|
||||
; CHECK-NEXT: smull r4, r7, r4, r3
|
||||
; CHECK-NEXT: smull r4, r7, r5, r4
|
||||
; CHECK-NEXT: asrl r4, r7, #31
|
||||
; CHECK-NEXT: rsbs.w r3, r4, #-2147483648
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r6, r4
|
||||
; CHECK-NEXT: sbcs.w r3, r2, r7
|
||||
; CHECK-NEXT: vmov q5[3], q5[1], r5, r7
|
||||
; CHECK-NEXT: rsbs.w r5, r4, #-2147483648
|
||||
; CHECK-NEXT: vmov.32 q3[0], r4
|
||||
; CHECK-NEXT: sbcs.w r5, r10, r7
|
||||
; CHECK-NEXT: vmov.32 q3[1], r7
|
||||
; CHECK-NEXT: mov.w r5, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r5, #1
|
||||
; CHECK-NEXT: cmp r5, #0
|
||||
; CHECK-NEXT: csetm r5, ne
|
||||
; CHECK-NEXT: vmov.32 q5[0], r5
|
||||
; CHECK-NEXT: vmov.32 q5[1], r5
|
||||
; CHECK-NEXT: vmov r5, s10
|
||||
; CHECK-NEXT: smull r6, r5, r6, r5
|
||||
; CHECK-NEXT: asrl r6, r5, #31
|
||||
; CHECK-NEXT: rsbs.w r3, r6, #-2147483648
|
||||
; CHECK-NEXT: vmov.32 q3[2], r6
|
||||
; CHECK-NEXT: sbcs.w r3, r10, r5
|
||||
; CHECK-NEXT: vmov.32 q3[3], r5
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r3, ne
|
||||
; CHECK-NEXT: rsbs.w r1, r6, #-2147483648
|
||||
; CHECK-NEXT: sbcs.w r1, r2, r5
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r3
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r3
|
||||
; CHECK-NEXT: ldrd r1, r2, [sp, #8] @ 8-byte Folded Reload
|
||||
; CHECK-NEXT: vbic q3, q0, q2
|
||||
; CHECK-NEXT: vand q2, q5, q2
|
||||
; CHECK-NEXT: vorr q2, q2, q3
|
||||
; CHECK-NEXT: vmov r3, s8
|
||||
; CHECK-NEXT: vmov r4, s9
|
||||
; CHECK-NEXT: subs.w r3, r3, r8
|
||||
; CHECK-NEXT: sbcs r3, r4, #0
|
||||
; CHECK-NEXT: vmov.32 q5[2], r3
|
||||
; CHECK-NEXT: vmov.32 q5[3], r3
|
||||
; CHECK-NEXT: vbic q2, q0, q5
|
||||
; CHECK-NEXT: vand q3, q3, q5
|
||||
; CHECK-NEXT: vorr q2, q3, q2
|
||||
; CHECK-NEXT: vmov r4, s8
|
||||
; CHECK-NEXT: vmov r3, s9
|
||||
; CHECK-NEXT: subs.w r4, r4, r8
|
||||
; CHECK-NEXT: sbcs r3, r3, #0
|
||||
; CHECK-NEXT: vmov r4, s10
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r3, ne
|
||||
; CHECK-NEXT: vmov.32 q3[0], r3
|
||||
; CHECK-NEXT: vmov.32 q3[1], r3
|
||||
; CHECK-NEXT: vmov r3, s11
|
||||
; CHECK-NEXT: csetm r5, ne
|
||||
; CHECK-NEXT: vmov.32 q3[1], r5
|
||||
; CHECK-NEXT: subs.w r4, r4, r8
|
||||
; CHECK-NEXT: sbcs r3, r3, #0
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
|
@ -367,7 +379,7 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(i32* nocapture readonly %pSrcA, i32*
|
|||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r3, ne
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r3, r5
|
||||
; CHECK-NEXT: vmov.32 q3[2], r3
|
||||
; CHECK-NEXT: vbic q5, q1, q3
|
||||
; CHECK-NEXT: vand q2, q2, q3
|
||||
; CHECK-NEXT: vorr q2, q2, q5
|
||||
|
@ -388,25 +400,25 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(i32* nocapture readonly %pSrcA, i32*
|
|||
; CHECK-NEXT: mvn r2, #-2147483648
|
||||
; CHECK-NEXT: .LBB1_7: @ %for.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldr r1, [r12], #4
|
||||
; CHECK-NEXT: ldr r4, [r9], #4
|
||||
; CHECK-NEXT: smull r4, r1, r4, r1
|
||||
; CHECK-NEXT: asrl r4, r1, #31
|
||||
; CHECK-NEXT: subs r5, r3, r4
|
||||
; CHECK-NEXT: sbcs.w r5, r0, r1
|
||||
; CHECK-NEXT: mov.w r5, #0
|
||||
; CHECK-NEXT: ldr r4, [r12], #4
|
||||
; CHECK-NEXT: ldr r5, [r1], #4
|
||||
; CHECK-NEXT: smull r4, r5, r5, r4
|
||||
; CHECK-NEXT: asrl r4, r5, #31
|
||||
; CHECK-NEXT: subs r6, r3, r4
|
||||
; CHECK-NEXT: sbcs.w r6, r0, r5
|
||||
; CHECK-NEXT: mov.w r6, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r5, #1
|
||||
; CHECK-NEXT: cmp r5, #0
|
||||
; CHECK-NEXT: movlt r6, #1
|
||||
; CHECK-NEXT: cmp r6, #0
|
||||
; CHECK-NEXT: csel r4, r4, r3, ne
|
||||
; CHECK-NEXT: csel r1, r1, r0, ne
|
||||
; CHECK-NEXT: subs r5, r4, r2
|
||||
; CHECK-NEXT: sbcs r1, r1, #0
|
||||
; CHECK-NEXT: csel r1, r4, r2, lt
|
||||
; CHECK-NEXT: str r1, [r11], #4
|
||||
; CHECK-NEXT: csel r5, r5, r0, ne
|
||||
; CHECK-NEXT: subs r6, r4, r2
|
||||
; CHECK-NEXT: sbcs r5, r5, #0
|
||||
; CHECK-NEXT: csel r4, r4, r2, lt
|
||||
; CHECK-NEXT: str r4, [r11], #4
|
||||
; CHECK-NEXT: le lr, .LBB1_7
|
||||
; CHECK-NEXT: .LBB1_8: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: add sp, #16
|
||||
; CHECK-NEXT: add sp, #8
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
|
||||
; CHECK-NEXT: add sp, #4
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
|
@ -503,8 +515,10 @@ for.body: ; preds = %for.body.preheader2
|
|||
define arm_aapcs_vfpcc void @ssatmul_4t_q31(i32* nocapture readonly %pSrcA, i32* nocapture readonly %pSrcB, i32* noalias nocapture %pDst, i32 %N) {
|
||||
; CHECK-LABEL: ssatmul_4t_q31:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
|
||||
; CHECK-NEXT: .pad #4
|
||||
; CHECK-NEXT: sub sp, #4
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: .pad #16
|
||||
|
@ -513,19 +527,19 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(i32* nocapture readonly %pSrcA, i32*
|
|||
; CHECK-NEXT: beq.w .LBB2_3
|
||||
; CHECK-NEXT: @ %bb.1: @ %vector.ph
|
||||
; CHECK-NEXT: adds r7, r3, #3
|
||||
; CHECK-NEXT: movs r6, #1
|
||||
; CHECK-NEXT: bic r7, r7, #3
|
||||
; CHECK-NEXT: adr r4, .LCPI2_1
|
||||
; CHECK-NEXT: bic r7, r7, #3
|
||||
; CHECK-NEXT: movs r6, #1
|
||||
; CHECK-NEXT: subs r7, #4
|
||||
; CHECK-NEXT: adr r5, .LCPI2_2
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r4]
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r5]
|
||||
; CHECK-NEXT: adr r4, .LCPI2_2
|
||||
; CHECK-NEXT: mov.w r9, #0
|
||||
; CHECK-NEXT: add.w lr, r6, r7, lsr #2
|
||||
; CHECK-NEXT: adr r6, .LCPI2_0
|
||||
; CHECK-NEXT: subs r7, r3, #1
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r6]
|
||||
; CHECK-NEXT: mov.w r9, #0
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r4]
|
||||
; CHECK-NEXT: vdup.32 q1, r7
|
||||
; CHECK-NEXT: mov.w r12, #-1
|
||||
; CHECK-NEXT: mvn r8, #-2147483648
|
||||
|
@ -549,59 +563,63 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(i32* nocapture readonly %pSrcA, i32*
|
|||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: vmov r6, s0
|
||||
; CHECK-NEXT: asrl r6, r5, #31
|
||||
; CHECK-NEXT: vmov r4, s2
|
||||
; CHECK-NEXT: rsbs.w r7, r6, #-2147483648
|
||||
; CHECK-NEXT: vmov.f32 s22, s21
|
||||
; CHECK-NEXT: sbcs.w r7, r12, r5
|
||||
; CHECK-NEXT: mov.w r7, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r7, #1
|
||||
; CHECK-NEXT: cmp r7, #0
|
||||
; CHECK-NEXT: vmov r7, s3
|
||||
; CHECK-NEXT: csetm r10, ne
|
||||
; CHECK-NEXT: rsbs.w r4, r6, #-2147483648
|
||||
; CHECK-NEXT: vmov.32 q7[0], r6
|
||||
; CHECK-NEXT: sbcs.w r4, r12, r5
|
||||
; CHECK-NEXT: vmov.32 q7[1], r5
|
||||
; CHECK-NEXT: mov.w r4, #0
|
||||
; CHECK-NEXT: vmov.f32 s22, s21
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r4, #1
|
||||
; CHECK-NEXT: cmp r4, #0
|
||||
; CHECK-NEXT: csetm r4, ne
|
||||
; CHECK-NEXT: vmov.32 q6[0], r4
|
||||
; CHECK-NEXT: vmov.32 q6[1], r4
|
||||
; CHECK-NEXT: vmov r4, s2
|
||||
; CHECK-NEXT: asrl r4, r7, #31
|
||||
; CHECK-NEXT: rsbs.w r3, r4, #-2147483648
|
||||
; CHECK-NEXT: vmov q7[2], q7[0], r4, r6
|
||||
; CHECK-NEXT: vmov.32 q7[2], r4
|
||||
; CHECK-NEXT: sbcs.w r3, r12, r7
|
||||
; CHECK-NEXT: vmov q7[3], q7[1], r7, r5
|
||||
; CHECK-NEXT: vmov.32 q7[3], r7
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: vmov r7, s22
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r3, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r10
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r10
|
||||
; CHECK-NEXT: vbic q6, q2, q0
|
||||
; CHECK-NEXT: vand q0, q7, q0
|
||||
; CHECK-NEXT: vorr q6, q0, q6
|
||||
; CHECK-NEXT: vmov.32 q6[2], r3
|
||||
; CHECK-NEXT: vmov.32 q6[3], r3
|
||||
; CHECK-NEXT: vbic q0, q2, q6
|
||||
; CHECK-NEXT: vand q6, q7, q6
|
||||
; CHECK-NEXT: vorr q6, q6, q0
|
||||
; CHECK-NEXT: vmov r4, s24
|
||||
; CHECK-NEXT: vmov r3, s25
|
||||
; CHECK-NEXT: vmov r5, s26
|
||||
; CHECK-NEXT: subs.w r4, r4, r8
|
||||
; CHECK-NEXT: vmov r4, s27
|
||||
; CHECK-NEXT: sbcs r3, r3, #0
|
||||
; CHECK-NEXT: vmov r4, s26
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r3, ne
|
||||
; CHECK-NEXT: vmov.32 q0[0], r3
|
||||
; CHECK-NEXT: vmov.32 q0[1], r3
|
||||
; CHECK-NEXT: vmov r3, s27
|
||||
; CHECK-NEXT: subs.w r4, r4, r8
|
||||
; CHECK-NEXT: vmov r4, s20
|
||||
; CHECK-NEXT: sbcs r3, r3, #0
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r3, ne
|
||||
; CHECK-NEXT: subs.w r5, r5, r8
|
||||
; CHECK-NEXT: vmov.32 q0[1], r3
|
||||
; CHECK-NEXT: sbcs r4, r4, #0
|
||||
; CHECK-NEXT: mov.w r4, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r4, #1
|
||||
; CHECK-NEXT: cmp r4, #0
|
||||
; CHECK-NEXT: csetm r4, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r4, r3
|
||||
; CHECK-NEXT: vmov.32 q0[2], r3
|
||||
; CHECK-NEXT: vmov r3, s16
|
||||
; CHECK-NEXT: vmov r4, s20
|
||||
; CHECK-NEXT: vbic q7, q3, q0
|
||||
; CHECK-NEXT: vand q0, q6, q0
|
||||
; CHECK-NEXT: vorr q6, q0, q7
|
||||
; CHECK-NEXT: smull r6, r5, r4, r3
|
||||
; CHECK-NEXT: vmov r4, s18
|
||||
; CHECK-NEXT: vmov r4, s22
|
||||
; CHECK-NEXT: asrl r6, r5, #31
|
||||
; CHECK-NEXT: rsbs.w r3, r6, #-2147483648
|
||||
; CHECK-NEXT: sbcs.w r3, r12, r5
|
||||
|
@ -609,43 +627,49 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(i32* nocapture readonly %pSrcA, i32*
|
|||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r10, ne
|
||||
; CHECK-NEXT: smull r4, r7, r7, r4
|
||||
; CHECK-NEXT: csetm r3, ne
|
||||
; CHECK-NEXT: vmov.32 q7[0], r3
|
||||
; CHECK-NEXT: vmov.32 q7[1], r3
|
||||
; CHECK-NEXT: vmov r3, s18
|
||||
; CHECK-NEXT: vmov.32 q4[0], r6
|
||||
; CHECK-NEXT: vmov.32 q4[1], r5
|
||||
; CHECK-NEXT: smull r4, r7, r4, r3
|
||||
; CHECK-NEXT: asrl r4, r7, #31
|
||||
; CHECK-NEXT: rsbs.w r3, r4, #-2147483648
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r4, r6
|
||||
; CHECK-NEXT: vmov.32 q4[2], r4
|
||||
; CHECK-NEXT: sbcs.w r3, r12, r7
|
||||
; CHECK-NEXT: vmov q5[3], q5[1], r7, r5
|
||||
; CHECK-NEXT: vmov.32 q4[3], r7
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r3, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r10
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r10
|
||||
; CHECK-NEXT: vbic q4, q2, q0
|
||||
; CHECK-NEXT: vand q0, q5, q0
|
||||
; CHECK-NEXT: vorr q4, q0, q4
|
||||
; CHECK-NEXT: vmov.32 q7[2], r3
|
||||
; CHECK-NEXT: vmov.32 q7[3], r3
|
||||
; CHECK-NEXT: vbic q0, q2, q7
|
||||
; CHECK-NEXT: vand q4, q4, q7
|
||||
; CHECK-NEXT: vorr q4, q4, q0
|
||||
; CHECK-NEXT: vmov r4, s16
|
||||
; CHECK-NEXT: vmov r3, s17
|
||||
; CHECK-NEXT: vmov r5, s18
|
||||
; CHECK-NEXT: subs.w r4, r4, r8
|
||||
; CHECK-NEXT: vmov r4, s19
|
||||
; CHECK-NEXT: sbcs r3, r3, #0
|
||||
; CHECK-NEXT: vmov r4, s18
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r3, ne
|
||||
; CHECK-NEXT: vmov.32 q0[0], r3
|
||||
; CHECK-NEXT: vmov.32 q0[1], r3
|
||||
; CHECK-NEXT: vmov r3, s19
|
||||
; CHECK-NEXT: subs.w r4, r4, r8
|
||||
; CHECK-NEXT: sbcs r3, r3, #0
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r3, ne
|
||||
; CHECK-NEXT: subs.w r5, r5, r8
|
||||
; CHECK-NEXT: vmov.32 q0[1], r3
|
||||
; CHECK-NEXT: sbcs r4, r4, #0
|
||||
; CHECK-NEXT: mov.w r4, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r4, #1
|
||||
; CHECK-NEXT: cmp r4, #0
|
||||
; CHECK-NEXT: csetm r4, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r4, r3
|
||||
; CHECK-NEXT: vmov.32 q0[2], r3
|
||||
; CHECK-NEXT: vbic q5, q3, q0
|
||||
; CHECK-NEXT: vand q0, q4, q0
|
||||
; CHECK-NEXT: vorr q0, q0, q5
|
||||
|
@ -658,7 +682,8 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(i32* nocapture readonly %pSrcA, i32*
|
|||
; CHECK-NEXT: .LBB2_3: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: add sp, #16
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
|
||||
; CHECK-NEXT: add sp, #4
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: @ %bb.4:
|
||||
; CHECK-NEXT: .LCPI2_0:
|
||||
|
@ -750,31 +775,33 @@ define arm_aapcs_vfpcc void @usatmul_2_q31(i32* nocapture readonly %pSrcA, i32*
|
|||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: .LBB3_4: @ %vector.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldrd r4, r7, [r0]
|
||||
; CHECK-NEXT: ldrd r4, r9, [r0]
|
||||
; CHECK-NEXT: adds r0, #8
|
||||
; CHECK-NEXT: ldrd r5, r10, [r1]
|
||||
; CHECK-NEXT: adds r1, #8
|
||||
; CHECK-NEXT: umull r4, r5, r5, r4
|
||||
; CHECK-NEXT: lsrl r4, r5, #31
|
||||
; CHECK-NEXT: subs.w r6, r4, #-1
|
||||
; CHECK-NEXT: umull r6, r7, r10, r7
|
||||
; CHECK-NEXT: vmov.32 q1[0], r4
|
||||
; CHECK-NEXT: sbcs r5, r5, #0
|
||||
; CHECK-NEXT: mov.w r5, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r5, #1
|
||||
; CHECK-NEXT: cmp r5, #0
|
||||
; CHECK-NEXT: lsrl r6, r7, #31
|
||||
; CHECK-NEXT: csetm r9, ne
|
||||
; CHECK-NEXT: subs.w r5, r6, #-1
|
||||
; CHECK-NEXT: vmov.32 q0[1], r9
|
||||
; CHECK-NEXT: sbcs r5, r7, #0
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r6, r4
|
||||
; CHECK-NEXT: csetm r5, ne
|
||||
; CHECK-NEXT: vmov.32 q0[0], r5
|
||||
; CHECK-NEXT: vmov.32 q0[1], r5
|
||||
; CHECK-NEXT: umull r6, r5, r10, r9
|
||||
; CHECK-NEXT: lsrl r6, r5, #31
|
||||
; CHECK-NEXT: subs.w r7, r6, #-1
|
||||
; CHECK-NEXT: vmov.32 q1[2], r6
|
||||
; CHECK-NEXT: sbcs r5, r5, #0
|
||||
; CHECK-NEXT: mov.w r5, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r5, #1
|
||||
; CHECK-NEXT: cmp r5, #0
|
||||
; CHECK-NEXT: csetm r5, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r5, r9
|
||||
; CHECK-NEXT: vmov.32 q0[2], r5
|
||||
; CHECK-NEXT: vand q1, q1, q0
|
||||
; CHECK-NEXT: vorn q0, q1, q0
|
||||
; CHECK-NEXT: vmov r4, s2
|
||||
|
@ -879,10 +906,8 @@ for.body: ; preds = %for.body.preheader,
|
|||
define arm_aapcs_vfpcc void @usatmul_4_q31(i32* nocapture readonly %pSrcA, i32* nocapture readonly %pSrcB, i32* noalias nocapture %pDst, i32 %N) {
|
||||
; CHECK-LABEL: usatmul_4_q31:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
; CHECK-NEXT: .pad #4
|
||||
; CHECK-NEXT: sub sp, #4
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
|
@ -918,53 +943,57 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(i32* nocapture readonly %pSrcA, i32*
|
|||
; CHECK-NEXT: vmov r5, s17
|
||||
; CHECK-NEXT: vmov r4, s16
|
||||
; CHECK-NEXT: lsrl r4, r5, #31
|
||||
; CHECK-NEXT: vmov r7, s19
|
||||
; CHECK-NEXT: subs.w r6, r4, #-1
|
||||
; CHECK-NEXT: vmov.f32 s10, s9
|
||||
; CHECK-NEXT: subs.w r6, r4, #-1
|
||||
; CHECK-NEXT: vmov.32 q3[0], r4
|
||||
; CHECK-NEXT: sbcs r5, r5, #0
|
||||
; CHECK-NEXT: vmov r6, s18
|
||||
; CHECK-NEXT: mov.w r5, #0
|
||||
; CHECK-NEXT: lsrl r6, r7, #31
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r5, #1
|
||||
; CHECK-NEXT: cmp r5, #0
|
||||
; CHECK-NEXT: csetm r11, ne
|
||||
; CHECK-NEXT: subs.w r5, r6, #-1
|
||||
; CHECK-NEXT: sbcs r5, r7, #0
|
||||
; CHECK-NEXT: vmov.32 q1[1], r11
|
||||
; CHECK-NEXT: mov.w r5, #0
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r6, r4
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r5, #1
|
||||
; CHECK-NEXT: cmp r5, #0
|
||||
; CHECK-NEXT: csetm r5, ne
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r5, r11
|
||||
; CHECK-NEXT: vmov.32 q1[0], r5
|
||||
; CHECK-NEXT: vmov.32 q1[1], r5
|
||||
; CHECK-NEXT: vmov r5, s19
|
||||
; CHECK-NEXT: lsrl r6, r5, #31
|
||||
; CHECK-NEXT: subs.w r7, r6, #-1
|
||||
; CHECK-NEXT: vmov.32 q3[2], r6
|
||||
; CHECK-NEXT: sbcs r5, r5, #0
|
||||
; CHECK-NEXT: mov.w r5, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r5, #1
|
||||
; CHECK-NEXT: cmp r5, #0
|
||||
; CHECK-NEXT: csetm r5, ne
|
||||
; CHECK-NEXT: vmov.32 q1[2], r5
|
||||
; CHECK-NEXT: vand q3, q3, q1
|
||||
; CHECK-NEXT: vorn q1, q3, q1
|
||||
; CHECK-NEXT: vmullb.u32 q3, q2, q0
|
||||
; CHECK-NEXT: vmov r5, s13
|
||||
; CHECK-NEXT: vmov r4, s12
|
||||
; CHECK-NEXT: lsrl r4, r5, #31
|
||||
; CHECK-NEXT: vmov r7, s15
|
||||
; CHECK-NEXT: subs.w r6, r4, #-1
|
||||
; CHECK-NEXT: vmov.32 q2[0], r4
|
||||
; CHECK-NEXT: sbcs r5, r5, #0
|
||||
; CHECK-NEXT: vmov r6, s14
|
||||
; CHECK-NEXT: mov.w r5, #0
|
||||
; CHECK-NEXT: lsrl r6, r7, #31
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r5, #1
|
||||
; CHECK-NEXT: cmp r5, #0
|
||||
; CHECK-NEXT: csetm r11, ne
|
||||
; CHECK-NEXT: subs.w r5, r6, #-1
|
||||
; CHECK-NEXT: sbcs r5, r7, #0
|
||||
; CHECK-NEXT: vmov.32 q0[1], r11
|
||||
; CHECK-NEXT: mov.w r5, #0
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r6, r4
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r5, #1
|
||||
; CHECK-NEXT: cmp r5, #0
|
||||
; CHECK-NEXT: csetm r5, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r5, r11
|
||||
; CHECK-NEXT: vmov.32 q0[0], r5
|
||||
; CHECK-NEXT: vmov.32 q0[1], r5
|
||||
; CHECK-NEXT: vmov r5, s15
|
||||
; CHECK-NEXT: lsrl r6, r5, #31
|
||||
; CHECK-NEXT: subs.w r7, r6, #-1
|
||||
; CHECK-NEXT: vmov.32 q2[2], r6
|
||||
; CHECK-NEXT: sbcs r5, r5, #0
|
||||
; CHECK-NEXT: mov.w r5, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r5, #1
|
||||
; CHECK-NEXT: cmp r5, #0
|
||||
; CHECK-NEXT: csetm r5, ne
|
||||
; CHECK-NEXT: vmov.32 q0[2], r5
|
||||
; CHECK-NEXT: vand q2, q2, q0
|
||||
; CHECK-NEXT: vorn q0, q2, q0
|
||||
; CHECK-NEXT: vmov.f32 s1, s2
|
||||
|
@ -992,8 +1021,7 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(i32* nocapture readonly %pSrcA, i32*
|
|||
; CHECK-NEXT: le lr, .LBB4_7
|
||||
; CHECK-NEXT: .LBB4_8: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: add sp, #4
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
|
||||
entry:
|
||||
%cmp8 = icmp eq i32 %N, 0
|
||||
br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
|
||||
|
@ -1563,12 +1591,12 @@ for.cond.cleanup: ; preds = %vector.body, %entry
|
|||
define arm_aapcs_vfpcc void @ssatmul_8t_q15(i16* nocapture readonly %pSrcA, i16* nocapture readonly %pSrcB, i16* noalias nocapture %pDst, i32 %N) {
|
||||
; CHECK-LABEL: ssatmul_8t_q15:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: .pad #16
|
||||
; CHECK-NEXT: sub sp, #16
|
||||
; CHECK-NEXT: .pad #24
|
||||
; CHECK-NEXT: sub sp, #24
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: beq.w .LBB9_3
|
||||
; CHECK-NEXT: @ %bb.1: @ %vector.ph
|
||||
|
@ -1579,99 +1607,107 @@ define arm_aapcs_vfpcc void @ssatmul_8t_q15(i16* nocapture readonly %pSrcA, i16*
|
|||
; CHECK-NEXT: sub.w r12, r12, #8
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r4]
|
||||
; CHECK-NEXT: adr r4, .LCPI9_1
|
||||
; CHECK-NEXT: movs r5, #0
|
||||
; CHECK-NEXT: vmov.i8 q2, #0x0
|
||||
; CHECK-NEXT: add.w lr, lr, r12, lsr #3
|
||||
; CHECK-NEXT: sub.w r12, r3, #1
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: vldrw.u32 q4, [r4]
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vdup.32 q1, r12
|
||||
; CHECK-NEXT: vmov.i8 q2, #0x0
|
||||
; CHECK-NEXT: vmov.i8 q3, #0xff
|
||||
; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill
|
||||
; CHECK-NEXT: .LBB9_2: @ %vector.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload
|
||||
; CHECK-NEXT: vdup.32 q6, r5
|
||||
; CHECK-NEXT: adds r5, #8
|
||||
; CHECK-NEXT: vorr q5, q6, q0
|
||||
; CHECK-NEXT: vorr q6, q6, q4
|
||||
; CHECK-NEXT: vldrw.u32 q5, [sp] @ 16-byte Reload
|
||||
; CHECK-NEXT: vdup.32 q0, r3
|
||||
; CHECK-NEXT: adds r3, #8
|
||||
; CHECK-NEXT: vorr q5, q0, q5
|
||||
; CHECK-NEXT: vorr q0, q0, q4
|
||||
; CHECK-NEXT: vcmp.u32 cs, q1, q5
|
||||
; CHECK-NEXT: vpsel q7, q3, q2
|
||||
; CHECK-NEXT: vcmp.u32 cs, q1, q6
|
||||
; CHECK-NEXT: vmov r4, s28
|
||||
; CHECK-NEXT: vpsel q6, q3, q2
|
||||
; CHECK-NEXT: vmov.16 q5[0], r4
|
||||
; CHECK-NEXT: vmov r4, s29
|
||||
; CHECK-NEXT: vmov.16 q5[1], r4
|
||||
; CHECK-NEXT: vmov r4, s30
|
||||
; CHECK-NEXT: vmov.16 q5[2], r4
|
||||
; CHECK-NEXT: vmov r4, s31
|
||||
; CHECK-NEXT: vmov.16 q5[3], r4
|
||||
; CHECK-NEXT: vcmp.u32 cs, q1, q0
|
||||
; CHECK-NEXT: vmov r4, s24
|
||||
; CHECK-NEXT: vmov.16 q5[4], r4
|
||||
; CHECK-NEXT: vpsel q0, q3, q2
|
||||
; CHECK-NEXT: vmov.16 q5[0], r4
|
||||
; CHECK-NEXT: vmov r4, s25
|
||||
; CHECK-NEXT: vmov.16 q5[5], r4
|
||||
; CHECK-NEXT: vmov.16 q5[1], r4
|
||||
; CHECK-NEXT: vmov r4, s26
|
||||
; CHECK-NEXT: vmov.16 q5[6], r4
|
||||
; CHECK-NEXT: vmov.16 q5[2], r4
|
||||
; CHECK-NEXT: vmov r4, s27
|
||||
; CHECK-NEXT: vmov.16 q5[3], r4
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: vmov.16 q5[4], r4
|
||||
; CHECK-NEXT: vmov r4, s1
|
||||
; CHECK-NEXT: vmov.16 q5[5], r4
|
||||
; CHECK-NEXT: vmov r4, s2
|
||||
; CHECK-NEXT: vmov.16 q5[6], r4
|
||||
; CHECK-NEXT: vmov r4, s3
|
||||
; CHECK-NEXT: vmov.16 q5[7], r4
|
||||
; CHECK-NEXT: vpt.i16 ne, q5, zr
|
||||
; CHECK-NEXT: vldrht.u16 q6, [r0], #16
|
||||
; CHECK-NEXT: vmov.u16 r4, q6[0]
|
||||
; CHECK-NEXT: vmov.u16 r3, q6[2]
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r3, r4
|
||||
; CHECK-NEXT: vmov.u16 r3, q6[1]
|
||||
; CHECK-NEXT: vmov.u16 r4, q6[3]
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vldrht.u16 q7, [r1], #16
|
||||
; CHECK-NEXT: vmov q5[3], q5[1], r4, r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q7[0]
|
||||
; CHECK-NEXT: vmov.32 q5[0], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q6[1]
|
||||
; CHECK-NEXT: vmov.32 q5[1], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q6[2]
|
||||
; CHECK-NEXT: vmov.32 q5[2], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q6[3]
|
||||
; CHECK-NEXT: vmov.32 q5[3], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q7[0]
|
||||
; CHECK-NEXT: vmov.32 q0[0], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q7[1]
|
||||
; CHECK-NEXT: vmov.32 q0[1], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q7[2]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r4, r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q7[1]
|
||||
; CHECK-NEXT: vmov.32 q0[2], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q7[3]
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r4, r3
|
||||
; CHECK-NEXT: vmov.u16 r4, q6[6]
|
||||
; CHECK-NEXT: vmov.32 q0[3], r4
|
||||
; CHECK-NEXT: vmullb.s16 q0, q0, q5
|
||||
; CHECK-NEXT: vqshrnb.s32 q0, q0, #15
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov.16 q5[0], r3
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov.16 q5[1], r3
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov.16 q5[2], r3
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: vmov.16 q5[3], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q6[4]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r4, r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q6[5]
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: vmov.16 q5[0], r4
|
||||
; CHECK-NEXT: vmov r4, s1
|
||||
; CHECK-NEXT: vmov.16 q5[1], r4
|
||||
; CHECK-NEXT: vmov r4, s2
|
||||
; CHECK-NEXT: vmov.16 q5[2], r4
|
||||
; CHECK-NEXT: vmov r4, s3
|
||||
; CHECK-NEXT: vmov.16 q5[3], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q6[4]
|
||||
; CHECK-NEXT: vmov.32 q0[0], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q6[5]
|
||||
; CHECK-NEXT: vmov.32 q0[1], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q6[6]
|
||||
; CHECK-NEXT: vmov.32 q0[2], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q6[7]
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r4, r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q7[4]
|
||||
; CHECK-NEXT: vmov.32 q0[3], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q7[4]
|
||||
; CHECK-NEXT: vmov.32 q6[0], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q7[5]
|
||||
; CHECK-NEXT: vmov.32 q6[1], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q7[6]
|
||||
; CHECK-NEXT: vmov q6[2], q6[0], r4, r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q7[5]
|
||||
; CHECK-NEXT: vmov.32 q6[2], r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q7[7]
|
||||
; CHECK-NEXT: vmov q6[3], q6[1], r4, r3
|
||||
; CHECK-NEXT: vmov.32 q6[3], r4
|
||||
; CHECK-NEXT: vmullb.s16 q0, q6, q0
|
||||
; CHECK-NEXT: vqshrnb.s32 q0, q0, #15
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov.16 q5[4], r3
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov.16 q5[5], r3
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov.16 q5[6], r3
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: vmov.16 q5[7], r3
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: vmov.16 q5[4], r4
|
||||
; CHECK-NEXT: vmov r4, s1
|
||||
; CHECK-NEXT: vmov.16 q5[5], r4
|
||||
; CHECK-NEXT: vmov r4, s2
|
||||
; CHECK-NEXT: vmov.16 q5[6], r4
|
||||
; CHECK-NEXT: vmov r4, s3
|
||||
; CHECK-NEXT: vmov.16 q5[7], r4
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vstrht.16 q5, [r2], #16
|
||||
; CHECK-NEXT: le lr, .LBB9_2
|
||||
; CHECK-NEXT: .LBB9_3: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: add sp, #16
|
||||
; CHECK-NEXT: add sp, #24
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: @ %bb.4:
|
||||
; CHECK-NEXT: .LCPI9_0:
|
||||
|
|
|
@ -34,13 +34,12 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @sadd_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
|
||||
; CHECK-LABEL: sadd_int64_t:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov lr, s4
|
||||
; CHECK-NEXT: vmov r5, s6
|
||||
; CHECK-NEXT: vmov r6, s2
|
||||
; CHECK-NEXT: vmov r4, s2
|
||||
; CHECK-NEXT: cmp.w r0, #-1
|
||||
; CHECK-NEXT: cset r1, gt
|
||||
; CHECK-NEXT: cmp.w r2, #-1
|
||||
|
@ -49,49 +48,53 @@ define arm_aapcs_vfpcc <2 x i64> @sadd_int64_t(<2 x i64> %src1, <2 x i64> %src2)
|
|||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: cset r12, eq
|
||||
; CHECK-NEXT: adds.w r1, r1, lr
|
||||
; CHECK-NEXT: adcs r2, r0
|
||||
; CHECK-NEXT: cmp.w r2, #-1
|
||||
; CHECK-NEXT: cset r0, gt
|
||||
; CHECK-NEXT: cmp r3, r0
|
||||
; CHECK-NEXT: cset r0, ne
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: and.w r0, r0, r12
|
||||
; CHECK-NEXT: mvn r12, #-2147483648
|
||||
; CHECK-NEXT: and r3, r0, #1
|
||||
; CHECK-NEXT: cset r0, mi
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: cinv r0, r12, eq
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: asrne r1, r2, #31
|
||||
; CHECK-NEXT: csel r0, r0, r2, ne
|
||||
; CHECK-NEXT: vmov.32 q2[0], r1
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: cmp.w r0, #-1
|
||||
; CHECK-NEXT: cset r1, gt
|
||||
; CHECK-NEXT: cmp.w r2, #-1
|
||||
; CHECK-NEXT: cset r3, gt
|
||||
; CHECK-NEXT: cmp r3, r1
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: cset lr, eq
|
||||
; CHECK-NEXT: adds r1, r1, r4
|
||||
; CHECK-NEXT: adcs r0, r2
|
||||
; CHECK-NEXT: cmp.w r0, #-1
|
||||
; CHECK-NEXT: cset r2, gt
|
||||
; CHECK-NEXT: cmp r3, r2
|
||||
; CHECK-NEXT: vmov r3, s7
|
||||
; CHECK-NEXT: cset r2, ne
|
||||
; CHECK-NEXT: and.w r2, r2, r12
|
||||
; CHECK-NEXT: ands r12, r2, #1
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: and.w r2, r2, lr
|
||||
; CHECK-NEXT: ands r2, r2, #1
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: asrne r1, r0, #31
|
||||
; CHECK-NEXT: cmp.w r3, #-1
|
||||
; CHECK-NEXT: cset lr, gt
|
||||
; CHECK-NEXT: cmp.w r2, #-1
|
||||
; CHECK-NEXT: cset r4, gt
|
||||
; CHECK-NEXT: cmp r4, lr
|
||||
; CHECK-NEXT: cset lr, eq
|
||||
; CHECK-NEXT: adds r5, r5, r6
|
||||
; CHECK-NEXT: adcs r2, r3
|
||||
; CHECK-NEXT: mvn r6, #-2147483648
|
||||
; CHECK-NEXT: cmp.w r2, #-1
|
||||
; CHECK-NEXT: cset r3, gt
|
||||
; CHECK-NEXT: cmp r4, r3
|
||||
; CHECK-NEXT: cset r3, ne
|
||||
; CHECK-NEXT: and.w r3, r3, lr
|
||||
; CHECK-NEXT: ands r3, r3, #1
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: asrne r5, r2, #31
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r5, r1
|
||||
; CHECK-NEXT: vmov.32 q2[2], r1
|
||||
; CHECK-NEXT: cset r1, mi
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: cinv r1, r6, eq
|
||||
; CHECK-NEXT: cmp.w r12, #0
|
||||
; CHECK-NEXT: csel r0, r1, r0, ne
|
||||
; CHECK-NEXT: cinv r1, r12, eq
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: cset r1, mi
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: cinv r1, r6, eq
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csel r1, r1, r2, ne
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
; CHECK-NEXT: csel r0, r1, r0, ne
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: vmov q0, q2
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
entry:
|
||||
%0 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2)
|
||||
ret <2 x i64> %0
|
||||
|
@ -130,36 +133,34 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @uadd_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
|
||||
; CHECK-LABEL: uadd_int64_t:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r4, s6
|
||||
; CHECK-NEXT: vmov r5, s2
|
||||
; CHECK-NEXT: adds r2, r2, r3
|
||||
; CHECK-NEXT: vmov r3, s7
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: adcs r0, r1
|
||||
; CHECK-NEXT: adcs r1, r12, #0
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: movne.w r0, #-1
|
||||
; CHECK-NEXT: movne.w r2, #-1
|
||||
; CHECK-NEXT: vmov.32 q2[0], r2
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: adcs lr, r12, #0
|
||||
; CHECK-NEXT: adds r2, r2, r3
|
||||
; CHECK-NEXT: adcs r0, r1
|
||||
; CHECK-NEXT: adcs r1, r12, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: movne.w r2, #-1
|
||||
; CHECK-NEXT: adds r4, r4, r5
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: adcs r3, r12, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: movne.w r4, #-1
|
||||
; CHECK-NEXT: cmp.w lr, #0
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r4, r2
|
||||
; CHECK-NEXT: vmov.32 q2[2], r2
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: movne.w r0, #-1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: movne.w r1, #-1
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: vmov q0, q2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2)
|
||||
ret <2 x i64> %0
|
||||
|
@ -199,13 +200,12 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @ssub_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
|
||||
; CHECK-LABEL: ssub_int64_t:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov lr, s4
|
||||
; CHECK-NEXT: vmov r5, s6
|
||||
; CHECK-NEXT: vmov r6, s2
|
||||
; CHECK-NEXT: vmov r4, s2
|
||||
; CHECK-NEXT: cmp.w r0, #-1
|
||||
; CHECK-NEXT: cset r1, gt
|
||||
; CHECK-NEXT: cmp.w r2, #-1
|
||||
|
@ -214,49 +214,53 @@ define arm_aapcs_vfpcc <2 x i64> @ssub_int64_t(<2 x i64> %src1, <2 x i64> %src2)
|
|||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: cset r12, ne
|
||||
; CHECK-NEXT: subs.w r1, r1, lr
|
||||
; CHECK-NEXT: sbcs r2, r0
|
||||
; CHECK-NEXT: cmp.w r2, #-1
|
||||
; CHECK-NEXT: cset r0, gt
|
||||
; CHECK-NEXT: cmp r3, r0
|
||||
; CHECK-NEXT: cset r0, ne
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: and.w r0, r0, r12
|
||||
; CHECK-NEXT: mvn r12, #-2147483648
|
||||
; CHECK-NEXT: and r3, r0, #1
|
||||
; CHECK-NEXT: cset r0, mi
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: cinv r0, r12, eq
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: asrne r1, r2, #31
|
||||
; CHECK-NEXT: csel r0, r0, r2, ne
|
||||
; CHECK-NEXT: vmov.32 q2[0], r1
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: cmp.w r0, #-1
|
||||
; CHECK-NEXT: cset r1, gt
|
||||
; CHECK-NEXT: cmp.w r2, #-1
|
||||
; CHECK-NEXT: cset r3, gt
|
||||
; CHECK-NEXT: cmp r3, r1
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: cset lr, ne
|
||||
; CHECK-NEXT: subs r1, r4, r1
|
||||
; CHECK-NEXT: sbc.w r0, r2, r0
|
||||
; CHECK-NEXT: cmp.w r0, #-1
|
||||
; CHECK-NEXT: cset r2, gt
|
||||
; CHECK-NEXT: cmp r3, r2
|
||||
; CHECK-NEXT: vmov r3, s7
|
||||
; CHECK-NEXT: cset r2, ne
|
||||
; CHECK-NEXT: and.w r2, r2, r12
|
||||
; CHECK-NEXT: ands r12, r2, #1
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: and.w r2, r2, lr
|
||||
; CHECK-NEXT: ands r2, r2, #1
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: asrne r1, r0, #31
|
||||
; CHECK-NEXT: cmp.w r3, #-1
|
||||
; CHECK-NEXT: cset lr, gt
|
||||
; CHECK-NEXT: cmp.w r2, #-1
|
||||
; CHECK-NEXT: cset r4, gt
|
||||
; CHECK-NEXT: cmp r4, lr
|
||||
; CHECK-NEXT: cset lr, ne
|
||||
; CHECK-NEXT: subs r5, r6, r5
|
||||
; CHECK-NEXT: sbcs r2, r3
|
||||
; CHECK-NEXT: mvn r6, #-2147483648
|
||||
; CHECK-NEXT: cmp.w r2, #-1
|
||||
; CHECK-NEXT: cset r3, gt
|
||||
; CHECK-NEXT: cmp r4, r3
|
||||
; CHECK-NEXT: cset r3, ne
|
||||
; CHECK-NEXT: and.w r3, r3, lr
|
||||
; CHECK-NEXT: ands r3, r3, #1
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: asrne r5, r2, #31
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r5, r1
|
||||
; CHECK-NEXT: vmov.32 q2[2], r1
|
||||
; CHECK-NEXT: cset r1, mi
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: cinv r1, r6, eq
|
||||
; CHECK-NEXT: cmp.w r12, #0
|
||||
; CHECK-NEXT: csel r0, r1, r0, ne
|
||||
; CHECK-NEXT: cinv r1, r12, eq
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: cset r1, mi
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: cinv r1, r6, eq
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csel r1, r1, r2, ne
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
; CHECK-NEXT: csel r0, r1, r0, ne
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: vmov q0, q2
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
entry:
|
||||
%0 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2)
|
||||
ret <2 x i64> %0
|
||||
|
@ -295,38 +299,36 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @usub_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
|
||||
; CHECK-LABEL: usub_int64_t:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r4, s6
|
||||
; CHECK-NEXT: vmov r5, s2
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: vmov r3, s7
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: sbcs.w r0, r1, r0
|
||||
; CHECK-NEXT: adc r1, r12, #0
|
||||
; CHECK-NEXT: rsbs.w lr, r1, #1
|
||||
; CHECK-NEXT: rsbs.w r1, r1, #1
|
||||
; CHECK-NEXT: itt ne
|
||||
; CHECK-NEXT: movne r0, #0
|
||||
; CHECK-NEXT: movne r2, #0
|
||||
; CHECK-NEXT: vmov.32 q2[0], r2
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: sbcs.w r0, r1, r0
|
||||
; CHECK-NEXT: adc r1, r12, #0
|
||||
; CHECK-NEXT: rsbs.w r1, r1, #1
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: movne r2, #0
|
||||
; CHECK-NEXT: subs r4, r5, r4
|
||||
; CHECK-NEXT: sbcs r1, r3
|
||||
; CHECK-NEXT: adc r3, r12, #0
|
||||
; CHECK-NEXT: rsbs.w r3, r3, #1
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: movne r4, #0
|
||||
; CHECK-NEXT: cmp.w lr, #0
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r4, r2
|
||||
; CHECK-NEXT: vmov.32 q2[2], r2
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: movne r0, #0
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: it ne
|
||||
; CHECK-NEXT: movne r1, #0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: vmov q0, q2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2)
|
||||
ret <2 x i64> %0
|
||||
|
|
|
@ -64,8 +64,9 @@ define arm_aapcs_vfpcc void @unscaled_v2i8_i8(i8* %base, <2 x i8>* %offptr, <2 x
|
|||
; CHECK-NEXT: ldrb r2, [r1]
|
||||
; CHECK-NEXT: vmov.i32 q1, #0xff
|
||||
; CHECK-NEXT: ldrb r1, [r1, #1]
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r2
|
||||
; CHECK-NEXT: vmov.32 q2[0], r2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov.32 q2[2], r1
|
||||
; CHECK-NEXT: vand q1, q2, q1
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: strb r2, [r0, r1]
|
||||
|
|
|
@ -63,11 +63,14 @@ define arm_aapcs_vfpcc <2 x i64> @sext_v2i64_v2i64_v2i32(<2 x i64> %m) {
|
|||
; CHECK-LABEL: sext_v2i64_v2i64_v2i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: asrs r0, r0, #31
|
||||
; CHECK-NEXT: asrs r1, r1, #31
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: asrs r0, r0, #31
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%shl = shl <2 x i64> %m, <i64 32, i64 32>
|
||||
|
@ -79,13 +82,15 @@ define arm_aapcs_vfpcc <2 x i64> @sext_v2i64_v2i64_v2i35(<2 x i64> %m) {
|
|||
; CHECK-LABEL: sext_v2i64_v2i64_v2i35:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: sbfx r0, r0, #0, #3
|
||||
; CHECK-NEXT: sbfx r1, r1, #0, #3
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: sbfx r0, r0, #0, #3
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
|
@ -173,19 +178,23 @@ define arm_aapcs_vfpcc <8 x i32> @sext_v8i16_v8i32(<8 x i16> %src) {
|
|||
; CHECK-LABEL: sext_v8i16_v8i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[0]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[2]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[1]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[3]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[2]
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[3]
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[4]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[6]
|
||||
; CHECK-NEXT: vmovlb.s16 q2, q1
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[5]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[7]
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[6]
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[7]
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: vmov q0, q2
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: vmovlb.s16 q1, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
|
@ -199,37 +208,45 @@ define arm_aapcs_vfpcc <16 x i32> @sext_v16i8_v16i32(<16 x i8> %src) {
|
|||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[0]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[2]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[1]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[3]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[2]
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[3]
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[4]
|
||||
; CHECK-NEXT: vmovlb.s8 q1, q1
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[6]
|
||||
; CHECK-NEXT: vmovlb.s16 q4, q1
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[5]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[7]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[6]
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[7]
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[8]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[10]
|
||||
; CHECK-NEXT: vmovlb.s8 q1, q1
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[9]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[11]
|
||||
; CHECK-NEXT: vmovlb.s16 q1, q1
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[10]
|
||||
; CHECK-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[11]
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[12]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[14]
|
||||
; CHECK-NEXT: vmovlb.s8 q2, q2
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[13]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[15]
|
||||
; CHECK-NEXT: vmovlb.s16 q2, q2
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[14]
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[15]
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vmovlb.s8 q1, q1
|
||||
; CHECK-NEXT: vmovlb.s8 q2, q2
|
||||
; CHECK-NEXT: vmovlb.s8 q0, q3
|
||||
; CHECK-NEXT: vmovlb.s16 q3, q0
|
||||
; CHECK-NEXT: vmovlb.s16 q1, q1
|
||||
; CHECK-NEXT: vmovlb.s16 q2, q2
|
||||
; CHECK-NEXT: vmov q0, q4
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: bx lr
|
||||
|
@ -242,11 +259,14 @@ define arm_aapcs_vfpcc <2 x i64> @sext_v2i32_v2i64(<2 x i32> %src) {
|
|||
; CHECK-LABEL: sext_v2i32_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: asrs r0, r0, #31
|
||||
; CHECK-NEXT: asrs r1, r1, #31
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: asrs r0, r0, #31
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = sext <2 x i32> %src to <2 x i64>
|
||||
|
@ -333,19 +353,23 @@ define arm_aapcs_vfpcc <8 x i32> @zext_v8i16_v8i32(<8 x i16> %src) {
|
|||
; CHECK-LABEL: zext_v8i16_v8i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[0]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[2]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[1]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[3]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[2]
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[3]
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[4]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[6]
|
||||
; CHECK-NEXT: vmovlb.u16 q2, q1
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[5]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[7]
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[6]
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[7]
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: vmov q0, q2
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: vmovlb.u16 q1, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
|
@ -359,35 +383,43 @@ define arm_aapcs_vfpcc <16 x i32> @zext_v16i8_v16i32(<16 x i8> %src) {
|
|||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[0]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[2]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[1]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[3]
|
||||
; CHECK-NEXT: vmov.i32 q3, #0xff
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[1]
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[2]
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[3]
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[4]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[6]
|
||||
; CHECK-NEXT: vand q4, q1, q3
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[5]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[7]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[6]
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[7]
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[8]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[10]
|
||||
; CHECK-NEXT: vand q1, q1, q3
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[9]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[11]
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[10]
|
||||
; CHECK-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[11]
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[12]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[14]
|
||||
; CHECK-NEXT: vand q2, q2, q3
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q5[0], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[13]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[15]
|
||||
; CHECK-NEXT: vmov q0, q4
|
||||
; CHECK-NEXT: vmov q5[3], q5[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q5[1], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[14]
|
||||
; CHECK-NEXT: vmov.32 q5[2], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[15]
|
||||
; CHECK-NEXT: vmov.32 q5[3], r0
|
||||
; CHECK-NEXT: vand q1, q1, q3
|
||||
; CHECK-NEXT: vand q2, q2, q3
|
||||
; CHECK-NEXT: vand q3, q5, q3
|
||||
; CHECK-NEXT: vmov q0, q4
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
|
|
|
@ -38,12 +38,15 @@ define arm_aapcs_vfpcc <2 x i64> @shl_qq_int64_t(<2 x i64> %src1, <2 x i64> %src
|
|||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: lsll r2, r1, r0
|
||||
; CHECK-NEXT: vmov r12, s6
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: lsll r0, r3, r12
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.32 q2[0], r2
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov.32 q2[1], r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: lsll r2, r1, r0
|
||||
; CHECK-NEXT: vmov.32 q2[2], r2
|
||||
; CHECK-NEXT: vmov.32 q2[3], r1
|
||||
; CHECK-NEXT: vmov q0, q2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = shl <2 x i64> %src1, %src2
|
||||
|
@ -87,21 +90,22 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @shru_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
|
||||
; CHECK-LABEL: shru_qq_int64_t:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r5, lr}
|
||||
; CHECK-NEXT: push {r5, lr}
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-NEXT: lsll r0, r5, r2
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: lsll r2, r3, r1
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r5
|
||||
; CHECK-NEXT: pop {r5, pc}
|
||||
; CHECK-NEXT: lsll r0, r1, r2
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov.32 q2[1], r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-NEXT: lsll r0, r1, r2
|
||||
; CHECK-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-NEXT: vmov.32 q2[3], r1
|
||||
; CHECK-NEXT: vmov q0, q2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = lshr <2 x i64> %src1, %src2
|
||||
ret <2 x i64> %0
|
||||
|
@ -148,12 +152,15 @@ define arm_aapcs_vfpcc <2 x i64> @shrs_qq_int64_t(<2 x i64> %src1, <2 x i64> %sr
|
|||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: asrl r2, r1, r0
|
||||
; CHECK-NEXT: vmov r12, s6
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: asrl r0, r3, r12
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.32 q2[0], r2
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov.32 q2[1], r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: asrl r2, r1, r0
|
||||
; CHECK-NEXT: vmov.32 q2[2], r2
|
||||
; CHECK-NEXT: vmov.32 q2[3], r1
|
||||
; CHECK-NEXT: vmov q0, q2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = ashr <2 x i64> %src1, %src2
|
||||
|
@ -196,12 +203,15 @@ define arm_aapcs_vfpcc <2 x i64> @shl_qi_int64_t(<2 x i64> %src1) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: lsll r0, r1, #4
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: lsll r2, r3, #4
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: lsll r0, r1, #4
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.32 q1[3], r1
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = shl <2 x i64> %src1, <i64 4, i64 4>
|
||||
|
@ -244,12 +254,15 @@ define arm_aapcs_vfpcc <2 x i64> @shru_qi_int64_t(<2 x i64> %src1) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: lsrl r0, r1, #4
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: lsrl r2, r3, #4
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: lsrl r0, r1, #4
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.32 q1[3], r1
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = lshr <2 x i64> %src1, <i64 4, i64 4>
|
||||
|
@ -292,12 +305,15 @@ define arm_aapcs_vfpcc <2 x i64> @shrs_qi_int64_t(<2 x i64> %src1) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: asrl r0, r1, #4
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: asrl r2, r3, #4
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: asrl r0, r1, #4
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.32 q1[3], r1
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = ashr <2 x i64> %src1, <i64 4, i64 4>
|
||||
|
@ -345,13 +361,16 @@ define arm_aapcs_vfpcc <2 x i64> @shl_qr_int64_t(<2 x i64> %src1, i64 %src2) {
|
|||
; CHECK-LABEL: shl_qr_int64_t:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r12, s0
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: lsll r12, r1, r0
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: lsll r2, r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r2
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: lsll r2, r3, r0
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r12
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: lsll r2, r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[2], r2
|
||||
; CHECK-NEXT: vmov.32 q1[3], r1
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%i = insertelement <2 x i64> undef, i64 %src2, i32 0
|
||||
|
@ -403,15 +422,18 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @shru_qr_int64_t(<2 x i64> %src1, i64 %src2) {
|
||||
; CHECK-LABEL: shru_qr_int64_t:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: rsb.w r12, r0, #0
|
||||
; CHECK-NEXT: rsbs r0, r0, #0
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: lsll r2, r1, r12
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: lsll r0, r3, r12
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
; CHECK-NEXT: lsll r2, r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r2
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: lsll r2, r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[2], r2
|
||||
; CHECK-NEXT: vmov.32 q1[3], r1
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%i = insertelement <2 x i64> undef, i64 %src2, i32 0
|
||||
|
@ -464,13 +486,16 @@ define arm_aapcs_vfpcc <2 x i64> @shrs_qr_int64_t(<2 x i64> %src1, i64 %src2) {
|
|||
; CHECK-LABEL: shrs_qr_int64_t:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r12, s0
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: asrl r12, r1, r0
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: asrl r2, r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r2
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: asrl r2, r3, r0
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r12
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: asrl r2, r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[2], r2
|
||||
; CHECK-NEXT: vmov.32 q1[3], r1
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%i = insertelement <2 x i64> undef, i64 %src2, i32 0
|
||||
|
|
|
@ -37,20 +37,22 @@ define arm_aapcs_vfpcc <2 x i64> @add_int64_t(<2 x i64> %src1, <2 x i64> %src2)
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: adds.w lr, r3, r2
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: adc.w r12, r1, r0
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, lr
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r12
|
||||
; CHECK-NEXT: vmov.32 q0[0], r0
|
||||
; CHECK-NEXT: vmov.32 q0[1], r1
|
||||
; CHECK-NEXT: vmov.32 q0[2], lr
|
||||
; CHECK-NEXT: vmov.32 q0[3], r12
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%0 = add nsw <2 x i64> %src1, %src2
|
||||
|
@ -186,20 +188,22 @@ define arm_aapcs_vfpcc <2 x i64> @sub_int64_t(<2 x i64> %src1, <2 x i64> %src2)
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: subs.w lr, r3, r2
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: vmov r3, s7
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: vmov r3, s5
|
||||
; CHECK-NEXT: sbc.w r12, r1, r0
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: subs r0, r2, r0
|
||||
; CHECK-NEXT: sbc.w r1, r3, r1
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, lr
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r12
|
||||
; CHECK-NEXT: vmov.32 q0[0], r0
|
||||
; CHECK-NEXT: vmov.32 q0[1], r1
|
||||
; CHECK-NEXT: vmov.32 q0[2], lr
|
||||
; CHECK-NEXT: vmov.32 q0[3], r12
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%0 = sub nsw <2 x i64> %src2, %src1
|
||||
|
@ -348,9 +352,11 @@ define arm_aapcs_vfpcc <2 x i64> @mul_int64_t(<2 x i64> %src1, <2 x i64> %src2)
|
|||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: mla r0, r2, r0, lr
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r4, r12
|
||||
; CHECK-NEXT: vmov.32 q0[0], r12
|
||||
; CHECK-NEXT: vmov.32 q0[1], r0
|
||||
; CHECK-NEXT: vmov.32 q0[2], r4
|
||||
; CHECK-NEXT: mla r1, r2, r3, r1
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q0[3], r1
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%0 = mul nsw <2 x i64> %src1, %src2
|
||||
|
|
|
@ -59,20 +59,22 @@ define <2 x i64> @vector_add_i64(<2 x i64> %lhs, <2 x i64> %rhs) {
|
|||
; CHECK-FP-NEXT: vmov d0, r0, r1
|
||||
; CHECK-FP-NEXT: add r0, sp, #8
|
||||
; CHECK-FP-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-FP-NEXT: vmov r1, s0
|
||||
; CHECK-FP-NEXT: vmov r3, s4
|
||||
; CHECK-FP-NEXT: vmov r0, s1
|
||||
; CHECK-FP-NEXT: vmov r2, s5
|
||||
; CHECK-FP-NEXT: vmov r1, s2
|
||||
; CHECK-FP-NEXT: vmov r0, s3
|
||||
; CHECK-FP-NEXT: vmov r3, s6
|
||||
; CHECK-FP-NEXT: vmov r2, s7
|
||||
; CHECK-FP-NEXT: adds.w lr, r1, r3
|
||||
; CHECK-FP-NEXT: vmov r3, s2
|
||||
; CHECK-FP-NEXT: vmov r1, s6
|
||||
; CHECK-FP-NEXT: vmov r3, s0
|
||||
; CHECK-FP-NEXT: vmov r1, s4
|
||||
; CHECK-FP-NEXT: adc.w r12, r0, r2
|
||||
; CHECK-FP-NEXT: vmov r2, s3
|
||||
; CHECK-FP-NEXT: vmov r0, s7
|
||||
; CHECK-FP-NEXT: vmov r2, s1
|
||||
; CHECK-FP-NEXT: vmov r0, s5
|
||||
; CHECK-FP-NEXT: adds r1, r1, r3
|
||||
; CHECK-FP-NEXT: vmov q0[2], q0[0], r1, lr
|
||||
; CHECK-FP-NEXT: vmov.32 q0[0], r1
|
||||
; CHECK-FP-NEXT: adcs r0, r2
|
||||
; CHECK-FP-NEXT: vmov q0[3], q0[1], r0, r12
|
||||
; CHECK-FP-NEXT: vmov.32 q0[1], r0
|
||||
; CHECK-FP-NEXT: vmov.32 q0[2], lr
|
||||
; CHECK-FP-NEXT: vmov.32 q0[3], r12
|
||||
; CHECK-FP-NEXT: vmov r0, r1, d0
|
||||
; CHECK-FP-NEXT: vmov r2, r3, d1
|
||||
; CHECK-FP-NEXT: pop {r7, pc}
|
||||
|
|
|
@ -124,19 +124,22 @@ define arm_aapcs_vfpcc <8 x i16> @vabd_s16(<8 x i16> %src1, <8 x i16> %src2) {
|
|||
; CHECK-LABEL: vabd_s16:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[0]
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[2]
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[1]
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[3]
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[2]
|
||||
; CHECK-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[3]
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[0]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[2]
|
||||
; CHECK-NEXT: vmovlb.s16 q2, q2
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[1]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[3]
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[6]
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[2]
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[3]
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vmovlb.s16 q2, q2
|
||||
; CHECK-NEXT: vmovlb.s16 q3, q3
|
||||
; CHECK-NEXT: vsub.i32 q2, q3, q2
|
||||
; CHECK-NEXT: vabs.s32 q3, q2
|
||||
|
@ -149,17 +152,22 @@ define arm_aapcs_vfpcc <8 x i16> @vabd_s16(<8 x i16> %src1, <8 x i16> %src2) {
|
|||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: vmov.16 q2[3], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[4]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[5]
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[7]
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[6]
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[7]
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[4]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[6]
|
||||
; CHECK-NEXT: vmovlb.s16 q1, q3
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[5]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[7]
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[6]
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[7]
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q3
|
||||
; CHECK-NEXT: vsub.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vabs.s32 q0, q0
|
||||
|
@ -186,47 +194,47 @@ define arm_aapcs_vfpcc <8 x i16> @vabd_s16(<8 x i16> %src1, <8 x i16> %src2) {
|
|||
define arm_aapcs_vfpcc <4 x i32> @vabd_s32(<4 x i32> %src1, <4 x i32> %src2) {
|
||||
; CHECK-LABEL: vabd_s32:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vmov.f32 s8, s0
|
||||
; CHECK-NEXT: vmov.f32 s12, s4
|
||||
; CHECK-NEXT: vmov.f32 s10, s1
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: vmov.f32 s14, s5
|
||||
; CHECK-NEXT: vmov r2, s12
|
||||
; CHECK-NEXT: vmov.f32 s16, s2
|
||||
; CHECK-NEXT: vmov.f32 s20, s6
|
||||
; CHECK-NEXT: vmov.f32 s18, s3
|
||||
; CHECK-NEXT: vmov.f32 s22, s7
|
||||
; CHECK-NEXT: vmov r3, s20
|
||||
; CHECK-NEXT: asrs r1, r0, #31
|
||||
; CHECK-NEXT: subs r0, r0, r2
|
||||
; CHECK-NEXT: sbc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: add.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: eor.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: vmov r1, s16
|
||||
; CHECK-NEXT: asrs r2, r1, #31
|
||||
; CHECK-NEXT: subs r1, r1, r3
|
||||
; CHECK-NEXT: sbc.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: vmov r3, s22
|
||||
; CHECK-NEXT: add.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: eor.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov r2, s14
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: vmov r3, s12
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov r2, s14
|
||||
; CHECK-NEXT: vmov.f32 s12, s2
|
||||
; CHECK-NEXT: vmov.f32 s14, s3
|
||||
; CHECK-NEXT: vmov.f32 s0, s6
|
||||
; CHECK-NEXT: vmov.f32 s2, s7
|
||||
; CHECK-NEXT: asrs r1, r0, #31
|
||||
; CHECK-NEXT: subs r0, r0, r2
|
||||
; CHECK-NEXT: sbc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: add.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: eor.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: vmov r1, s18
|
||||
; CHECK-NEXT: vmov r1, s8
|
||||
; CHECK-NEXT: asrs r2, r1, #31
|
||||
; CHECK-NEXT: subs r1, r1, r3
|
||||
; CHECK-NEXT: sbc.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: add.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: eor.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov.32 q2[0], r1
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: asrs r1, r0, #31
|
||||
; CHECK-NEXT: subs r0, r0, r2
|
||||
; CHECK-NEXT: sbc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: add.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: eor.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: asrs r1, r0, #31
|
||||
; CHECK-NEXT: subs r0, r0, r2
|
||||
; CHECK-NEXT: sbc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: add.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: eor.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: vmov q0, q2
|
||||
; CHECK-NEXT: bx lr
|
||||
%sextsrc1 = sext <4 x i32> %src1 to <4 x i64>
|
||||
%sextsrc2 = sext <4 x i32> %src2 to <4 x i64>
|
||||
|
@ -361,19 +369,22 @@ define arm_aapcs_vfpcc <8 x i16> @vabd_u16(<8 x i16> %src1, <8 x i16> %src2) {
|
|||
; CHECK-LABEL: vabd_u16:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[0]
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[2]
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[1]
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[3]
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[2]
|
||||
; CHECK-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[3]
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[0]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[2]
|
||||
; CHECK-NEXT: vmovlb.u16 q2, q2
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[1]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[3]
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[6]
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[2]
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[3]
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vmovlb.u16 q2, q2
|
||||
; CHECK-NEXT: vmovlb.u16 q3, q3
|
||||
; CHECK-NEXT: vsub.i32 q2, q3, q2
|
||||
; CHECK-NEXT: vabs.s32 q3, q2
|
||||
|
@ -386,17 +397,22 @@ define arm_aapcs_vfpcc <8 x i16> @vabd_u16(<8 x i16> %src1, <8 x i16> %src2) {
|
|||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: vmov.16 q2[3], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[4]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[5]
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[7]
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[6]
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[7]
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[4]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[6]
|
||||
; CHECK-NEXT: vmovlb.u16 q1, q3
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[5]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[7]
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[6]
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[7]
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vmovlb.u16 q0, q3
|
||||
; CHECK-NEXT: vsub.i32 q0, q0, q1
|
||||
; CHECK-NEXT: vabs.s32 q0, q0
|
||||
|
@ -423,56 +439,59 @@ define arm_aapcs_vfpcc <8 x i16> @vabd_u16(<8 x i16> %src1, <8 x i16> %src2) {
|
|||
define arm_aapcs_vfpcc <4 x i32> @vabd_u32(<4 x i32> %src1, <4 x i32> %src2) {
|
||||
; CHECK-LABEL: vabd_u32:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov.f32 s8, s4
|
||||
; CHECK-NEXT: vmov.i64 q4, #0xffffffff
|
||||
; CHECK-NEXT: vmov.f32 s12, s0
|
||||
; CHECK-NEXT: vmov.i64 q3, #0xffffffff
|
||||
; CHECK-NEXT: vmov.f32 s16, s0
|
||||
; CHECK-NEXT: vmov.f32 s10, s5
|
||||
; CHECK-NEXT: vmov.f32 s14, s1
|
||||
; CHECK-NEXT: vand q2, q2, q4
|
||||
; CHECK-NEXT: vand q3, q3, q4
|
||||
; CHECK-NEXT: vmov r2, s8
|
||||
; CHECK-NEXT: vmov r3, s12
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov r1, s13
|
||||
; CHECK-NEXT: vmov.f32 s20, s6
|
||||
; CHECK-NEXT: vmov.f32 s22, s7
|
||||
; CHECK-NEXT: vand q1, q5, q4
|
||||
; CHECK-NEXT: vmov.f32 s20, s2
|
||||
; CHECK-NEXT: vmov.f32 s22, s3
|
||||
; CHECK-NEXT: vand q4, q5, q4
|
||||
; CHECK-NEXT: vmov.f32 s18, s1
|
||||
; CHECK-NEXT: vand q2, q2, q3
|
||||
; CHECK-NEXT: vand q4, q4, q3
|
||||
; CHECK-NEXT: vmov r2, s10
|
||||
; CHECK-NEXT: vmov r3, s18
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov r1, s19
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: vmov r3, s8
|
||||
; CHECK-NEXT: sbc.w r0, r1, r0
|
||||
; CHECK-NEXT: add.w r1, r2, r0, asr #31
|
||||
; CHECK-NEXT: vmov r2, s17
|
||||
; CHECK-NEXT: eor.w r12, r1, r0, asr #31
|
||||
; CHECK-NEXT: vmov r0, s16
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: vmov r1, s9
|
||||
; CHECK-NEXT: vmov.f32 s16, s6
|
||||
; CHECK-NEXT: vmov.f32 s18, s7
|
||||
; CHECK-NEXT: vand q1, q4, q3
|
||||
; CHECK-NEXT: vmov.f32 s16, s2
|
||||
; CHECK-NEXT: vmov.f32 s18, s3
|
||||
; CHECK-NEXT: vand q0, q4, q3
|
||||
; CHECK-NEXT: subs r0, r0, r3
|
||||
; CHECK-NEXT: vmov r3, s14
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: sbc.w r1, r2, r1
|
||||
; CHECK-NEXT: vmov r2, s10
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: add.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: eor.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: vmov r1, s15
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r12
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.32 q2[1], r12
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: sbc.w r0, r1, r0
|
||||
; CHECK-NEXT: add.w r1, r2, r0, asr #31
|
||||
; CHECK-NEXT: vmov r2, s19
|
||||
; CHECK-NEXT: eor.w r12, r1, r0, asr #31
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: subs r0, r0, r3
|
||||
; CHECK-NEXT: sbc.w r1, r2, r1
|
||||
; CHECK-NEXT: add.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: eor.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r12
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: eor.w r0, r1, r0, asr #31
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: sbc.w r0, r1, r0
|
||||
; CHECK-NEXT: add.w r1, r2, r0, asr #31
|
||||
; CHECK-NEXT: eor.w r0, r1, r0, asr #31
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: vmov q0, q2
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: bx lr
|
||||
%zextsrc1 = zext <4 x i32> %src1 to <4 x i64>
|
||||
%zextsrc2 = zext <4 x i32> %src2 to <4 x i64>
|
||||
|
@ -599,8 +618,10 @@ for.cond.cleanup: ; preds = %vector.body
|
|||
define void @vabd_loop_s32(i32* nocapture readonly %x, i32* nocapture readonly %y, i32* noalias nocapture %z, i32 %n) {
|
||||
; CHECK-LABEL: vabd_loop_s32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
|
||||
; CHECK-NEXT: .pad #4
|
||||
; CHECK-NEXT: sub sp, #4
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: mov.w lr, #256
|
||||
|
@ -632,31 +653,34 @@ define void @vabd_loop_s32(i32* nocapture readonly %x, i32* nocapture readonly %
|
|||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: subs.w r9, r5, r7
|
||||
; CHECK-NEXT: asr.w r6, r5, #31
|
||||
; CHECK-NEXT: vmov r5, s6
|
||||
; CHECK-NEXT: sbc.w r6, r6, r7, asr #31
|
||||
; CHECK-NEXT: vmov.32 q1[0], r8
|
||||
; CHECK-NEXT: vmov.32 q1[1], r9
|
||||
; CHECK-NEXT: and.w r6, r12, r6, asr #31
|
||||
; CHECK-NEXT: rsbs r6, r6, #0
|
||||
; CHECK-NEXT: bfi r4, r6, #4, #4
|
||||
; CHECK-NEXT: vmov r6, s12
|
||||
; CHECK-NEXT: subs.w r10, r6, r3
|
||||
; CHECK-NEXT: asr.w r7, r6, #31
|
||||
; CHECK-NEXT: asrs r7, r6, #31
|
||||
; CHECK-NEXT: subs r6, r6, r3
|
||||
; CHECK-NEXT: sbc.w r3, r7, r3, asr #31
|
||||
; CHECK-NEXT: vmov r7, s14
|
||||
; CHECK-NEXT: vmov r6, s6
|
||||
; CHECK-NEXT: vmov.32 q1[2], r6
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: asrs r5, r7, #31
|
||||
; CHECK-NEXT: subs r7, r7, r6
|
||||
; CHECK-NEXT: sbc.w r5, r5, r6, asr #31
|
||||
; CHECK-NEXT: asrs r6, r5, #31
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r6, r3
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r10, r8
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r7, r9
|
||||
; CHECK-NEXT: and r3, r3, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r4, r3, #8, #4
|
||||
; CHECK-NEXT: and.w r3, r12, r5, asr #31
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: bfi r4, r3, #12, #4
|
||||
; CHECK-NEXT: vmov.32 q2[0], r3
|
||||
; CHECK-NEXT: vmov r3, s14
|
||||
; CHECK-NEXT: asrs r7, r3, #31
|
||||
; CHECK-NEXT: subs r3, r3, r5
|
||||
; CHECK-NEXT: sbc.w r5, r7, r5, asr #31
|
||||
; CHECK-NEXT: vmov.32 q1[3], r3
|
||||
; CHECK-NEXT: asrs r7, r5, #31
|
||||
; CHECK-NEXT: and.w r5, r12, r5, asr #31
|
||||
; CHECK-NEXT: vmov.32 q2[2], r7
|
||||
; CHECK-NEXT: rsbs r5, r5, #0
|
||||
; CHECK-NEXT: vmov r7, s8
|
||||
; CHECK-NEXT: and r7, r7, #1
|
||||
; CHECK-NEXT: rsbs r7, r7, #0
|
||||
; CHECK-NEXT: bfi r4, r7, #8, #4
|
||||
; CHECK-NEXT: bfi r4, r5, #12, #4
|
||||
; CHECK-NEXT: vmsr p0, r4
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vsubt.i32 q1, q0, q1
|
||||
|
@ -664,7 +688,8 @@ define void @vabd_loop_s32(i32* nocapture readonly %x, i32* nocapture readonly %
|
|||
; CHECK-NEXT: le lr, .LBB8_1
|
||||
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
|
||||
; CHECK-NEXT: add sp, #4
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
|
||||
entry:
|
||||
br label %vector.body
|
||||
|
||||
|
@ -809,10 +834,8 @@ for.cond.cleanup: ; preds = %vector.body
|
|||
define void @vabd_loop_u32(i32* nocapture readonly %x, i32* nocapture readonly %y, i32* noalias nocapture %z, i32 %n) {
|
||||
; CHECK-LABEL: vabd_loop_u32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
; CHECK-NEXT: .pad #4
|
||||
; CHECK-NEXT: sub sp, #4
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: mov.w lr, #256
|
||||
|
@ -859,25 +882,28 @@ define void @vabd_loop_u32(i32* nocapture readonly %x, i32* nocapture readonly %
|
|||
; CHECK-NEXT: bfi r4, r3, #4, #4
|
||||
; CHECK-NEXT: vmov r3, s9
|
||||
; CHECK-NEXT: subs.w r10, r5, r7
|
||||
; CHECK-NEXT: vmov r5, s10
|
||||
; CHECK-NEXT: vmov r7, s15
|
||||
; CHECK-NEXT: vmov r7, s10
|
||||
; CHECK-NEXT: vmov r5, s14
|
||||
; CHECK-NEXT: sbc.w r3, r6, r3
|
||||
; CHECK-NEXT: vmov r6, s11
|
||||
; CHECK-NEXT: asr.w r11, r3, #31
|
||||
; CHECK-NEXT: vmov r3, s14
|
||||
; CHECK-NEXT: subs r3, r3, r5
|
||||
; CHECK-NEXT: sbc.w r5, r7, r6
|
||||
; CHECK-NEXT: asrs r6, r5, #31
|
||||
; CHECK-NEXT: and.w r5, r12, r5, asr #31
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r6, r11
|
||||
; CHECK-NEXT: rsbs r5, r5, #0
|
||||
; CHECK-NEXT: vmov r6, s8
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r10, r8
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r3, r9
|
||||
; CHECK-NEXT: vmov r6, s15
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov.32 q4[0], r3
|
||||
; CHECK-NEXT: vmov r3, s11
|
||||
; CHECK-NEXT: vmov.32 q2[0], r8
|
||||
; CHECK-NEXT: vmov.32 q2[1], r9
|
||||
; CHECK-NEXT: vmov.32 q2[2], r10
|
||||
; CHECK-NEXT: subs r5, r5, r7
|
||||
; CHECK-NEXT: vmov.32 q2[3], r5
|
||||
; CHECK-NEXT: sbc.w r3, r6, r3
|
||||
; CHECK-NEXT: asrs r6, r3, #31
|
||||
; CHECK-NEXT: and.w r3, r12, r3, asr #31
|
||||
; CHECK-NEXT: vmov.32 q4[2], r6
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: vmov r6, s16
|
||||
; CHECK-NEXT: and r6, r6, #1
|
||||
; CHECK-NEXT: rsbs r6, r6, #0
|
||||
; CHECK-NEXT: bfi r4, r6, #8, #4
|
||||
; CHECK-NEXT: bfi r4, r5, #12, #4
|
||||
; CHECK-NEXT: bfi r4, r3, #12, #4
|
||||
; CHECK-NEXT: vmsr p0, r4
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vsubt.i32 q2, q1, q2
|
||||
|
@ -885,8 +911,7 @@ define void @vabd_loop_u32(i32* nocapture readonly %x, i32* nocapture readonly %
|
|||
; CHECK-NEXT: le lr, .LBB11_1
|
||||
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: add sp, #4
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
|
||||
entry:
|
||||
br label %vector.body
|
||||
|
||||
|
|
|
@ -367,31 +367,36 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @vcmp_eq_v2i64(<2 x i64> %src, <2 x i64> %srcb, <2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: vcmp_eq_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q4[0], r0
|
||||
; CHECK-NEXT: vmov.32 q4[1], r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: eors r2, r3
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: vbic q1, q3, q0
|
||||
; CHECK-NEXT: vand q0, q2, q0
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q4[2], r0
|
||||
; CHECK-NEXT: vmov.32 q4[3], r0
|
||||
; CHECK-NEXT: vbic q0, q3, q4
|
||||
; CHECK-NEXT: vand q1, q2, q4
|
||||
; CHECK-NEXT: vorr q0, q1, q0
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = icmp eq <2 x i64> %src, %srcb
|
||||
|
@ -402,31 +407,36 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i32> @vcmp_eq_v2i32(<2 x i64> %src, <2 x i64> %srcb, <2 x i32> %a, <2 x i32> %b) {
|
||||
; CHECK-LABEL: vcmp_eq_v2i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s7
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q4[0], r0
|
||||
; CHECK-NEXT: vmov.32 q4[1], r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: eors r0, r1
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: eors r2, r3
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: vbic q1, q3, q0
|
||||
; CHECK-NEXT: vand q0, q2, q0
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q4[2], r0
|
||||
; CHECK-NEXT: vmov.32 q4[3], r0
|
||||
; CHECK-NEXT: vbic q0, q3, q4
|
||||
; CHECK-NEXT: vand q1, q2, q4
|
||||
; CHECK-NEXT: vorr q0, q1, q0
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = icmp eq <2 x i64> %src, %srcb
|
||||
|
@ -437,76 +447,84 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
|
||||
; CHECK-LABEL: vcmp_multi_v2i32:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r2, s8
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: vmov r2, s8
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: vbic q0, q2, q0
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: subs r1, r0, r2
|
||||
; CHECK-NEXT: asr.w r12, r0, #31
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vbic q0, q2, q3
|
||||
; CHECK-NEXT: vmov lr, s0
|
||||
; CHECK-NEXT: subs.w r1, lr, r2
|
||||
; CHECK-NEXT: asr.w r12, lr, #31
|
||||
; CHECK-NEXT: sbcs.w r1, r12, r2, asr #31
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r2, s10
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: vmov r1, s10
|
||||
; CHECK-NEXT: csetm lr, ne
|
||||
; CHECK-NEXT: asr.w r12, r2, #31
|
||||
; CHECK-NEXT: subs r4, r2, r1
|
||||
; CHECK-NEXT: sbcs.w r1, r12, r1, asr #31
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov.32 q3[0], r1
|
||||
; CHECK-NEXT: vmov.32 q3[1], r1
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: subs r0, r1, r2
|
||||
; CHECK-NEXT: asr.w r12, r1, #31
|
||||
; CHECK-NEXT: sbcs.w r0, r12, r2, asr #31
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: cset r0, ne
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, lr
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, lr
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: cset r1, ne
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r1, r0
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r1, r0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: cmp.w lr, #0
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: cset r0, ne
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: cset r1, ne
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: vand q1, q1, q4
|
||||
; CHECK-NEXT: vmov.32 q4[0], r0
|
||||
; CHECK-NEXT: vmov.32 q4[1], r0
|
||||
; CHECK-NEXT: cset r0, ne
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q4[2], r0
|
||||
; CHECK-NEXT: vmov.32 q4[3], r0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: cset r0, ne
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q5[0], r0
|
||||
; CHECK-NEXT: vmov.32 q5[1], r0
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: cset r0, ne
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q5[2], r0
|
||||
; CHECK-NEXT: vmov.32 q5[3], r0
|
||||
; CHECK-NEXT: vand q1, q5, q4
|
||||
; CHECK-NEXT: vand q1, q3, q1
|
||||
; CHECK-NEXT: vbic q0, q0, q1
|
||||
; CHECK-NEXT: vand q1, q2, q1
|
||||
; CHECK-NEXT: vorr q0, q1, q0
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
%a4 = icmp eq <2 x i64> %a, zeroinitializer
|
||||
%a5 = select <2 x i1> %a4, <2 x i32> zeroinitializer, <2 x i32> %c
|
||||
%a6 = icmp ne <2 x i32> %b, zeroinitializer
|
||||
|
|
|
@ -438,22 +438,24 @@ define arm_aapcs_vfpcc <2 x i64> @vcmp_eq_v2i64(<2 x i64> %src, i64 %src2, <2 x
|
|||
; CHECK-NEXT: eors r2, r1
|
||||
; CHECK-NEXT: eors r3, r0
|
||||
; CHECK-NEXT: orrs r2, r3
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: cset r2, eq
|
||||
; CHECK-NEXT: tst.w r2, #1
|
||||
; CHECK-NEXT: csetm r2, ne
|
||||
; CHECK-NEXT: eors r1, r3
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: eors r0, r3
|
||||
; CHECK-NEXT: vmov.32 q3[0], r2
|
||||
; CHECK-NEXT: vmov.32 q3[1], r2
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: eors r0, r2
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r2
|
||||
; CHECK-NEXT: vbic q2, q2, q0
|
||||
; CHECK-NEXT: vand q0, q1, q0
|
||||
; CHECK-NEXT: vorr q0, q0, q2
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vbic q0, q2, q3
|
||||
; CHECK-NEXT: vand q1, q1, q3
|
||||
; CHECK-NEXT: vorr q0, q1, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%i = insertelement <2 x i64> undef, i64 %src2, i32 0
|
||||
|
@ -471,22 +473,24 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_eq_v2i32(<2 x i64> %src, i64 %src2, <2 x
|
|||
; CHECK-NEXT: eors r2, r1
|
||||
; CHECK-NEXT: eors r3, r0
|
||||
; CHECK-NEXT: orrs r2, r3
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: cset r2, eq
|
||||
; CHECK-NEXT: tst.w r2, #1
|
||||
; CHECK-NEXT: csetm r2, ne
|
||||
; CHECK-NEXT: eors r1, r3
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: eors r0, r3
|
||||
; CHECK-NEXT: vmov.32 q3[0], r2
|
||||
; CHECK-NEXT: vmov.32 q3[1], r2
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: eors r0, r2
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r2
|
||||
; CHECK-NEXT: vbic q2, q2, q0
|
||||
; CHECK-NEXT: vand q0, q1, q0
|
||||
; CHECK-NEXT: vorr q0, q0, q2
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vbic q0, q2, q3
|
||||
; CHECK-NEXT: vand q1, q1, q3
|
||||
; CHECK-NEXT: vorr q0, q1, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%i = insertelement <2 x i64> undef, i64 %src2, i32 0
|
||||
|
@ -499,76 +503,84 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
|
||||
; CHECK-LABEL: vcmp_multi_v2i32:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r2, s8
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: vmov r2, s8
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: vbic q0, q2, q0
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: subs r1, r0, r2
|
||||
; CHECK-NEXT: asr.w r12, r0, #31
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vbic q0, q2, q3
|
||||
; CHECK-NEXT: vmov lr, s0
|
||||
; CHECK-NEXT: subs.w r1, lr, r2
|
||||
; CHECK-NEXT: asr.w r12, lr, #31
|
||||
; CHECK-NEXT: sbcs.w r1, r12, r2, asr #31
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r2, s10
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: vmov r1, s10
|
||||
; CHECK-NEXT: csetm lr, ne
|
||||
; CHECK-NEXT: asr.w r12, r2, #31
|
||||
; CHECK-NEXT: subs r4, r2, r1
|
||||
; CHECK-NEXT: sbcs.w r1, r12, r1, asr #31
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov.32 q3[0], r1
|
||||
; CHECK-NEXT: vmov.32 q3[1], r1
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: subs r0, r1, r2
|
||||
; CHECK-NEXT: asr.w r12, r1, #31
|
||||
; CHECK-NEXT: sbcs.w r0, r12, r2, asr #31
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: cset r0, ne
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, lr
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, lr
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: cset r1, ne
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r1, r0
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r1, r0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: cmp.w lr, #0
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: cset r0, ne
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: cset r1, ne
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: vand q1, q1, q4
|
||||
; CHECK-NEXT: vmov.32 q4[0], r0
|
||||
; CHECK-NEXT: vmov.32 q4[1], r0
|
||||
; CHECK-NEXT: cset r0, ne
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q4[2], r0
|
||||
; CHECK-NEXT: vmov.32 q4[3], r0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: cset r0, ne
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q5[0], r0
|
||||
; CHECK-NEXT: vmov.32 q5[1], r0
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: cset r0, ne
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q5[2], r0
|
||||
; CHECK-NEXT: vmov.32 q5[3], r0
|
||||
; CHECK-NEXT: vand q1, q5, q4
|
||||
; CHECK-NEXT: vand q1, q3, q1
|
||||
; CHECK-NEXT: vbic q0, q0, q1
|
||||
; CHECK-NEXT: vand q1, q2, q1
|
||||
; CHECK-NEXT: vorr q0, q1, q0
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
%a4 = icmp eq <2 x i64> %a, zeroinitializer
|
||||
%a5 = select <2 x i1> %a4, <2 x i32> zeroinitializer, <2 x i32> %c
|
||||
%a6 = icmp ne <2 x i32> %b, zeroinitializer
|
||||
|
@ -1019,22 +1031,24 @@ define arm_aapcs_vfpcc <2 x i64> @vcmp_r_eq_v2i64(<2 x i64> %src, i64 %src2, <2
|
|||
; CHECK-NEXT: eors r2, r1
|
||||
; CHECK-NEXT: eors r3, r0
|
||||
; CHECK-NEXT: orrs r2, r3
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: cset r2, eq
|
||||
; CHECK-NEXT: tst.w r2, #1
|
||||
; CHECK-NEXT: csetm r2, ne
|
||||
; CHECK-NEXT: eors r1, r3
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: eors r0, r3
|
||||
; CHECK-NEXT: vmov.32 q3[0], r2
|
||||
; CHECK-NEXT: vmov.32 q3[1], r2
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: eors r0, r2
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r2
|
||||
; CHECK-NEXT: vbic q2, q2, q0
|
||||
; CHECK-NEXT: vand q0, q1, q0
|
||||
; CHECK-NEXT: vorr q0, q0, q2
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vbic q0, q2, q3
|
||||
; CHECK-NEXT: vand q1, q1, q3
|
||||
; CHECK-NEXT: vorr q0, q1, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%i = insertelement <2 x i64> undef, i64 %src2, i32 0
|
||||
|
@ -1052,22 +1066,24 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_r_eq_v2i32(<2 x i64> %src, i64 %src2, <2
|
|||
; CHECK-NEXT: eors r2, r1
|
||||
; CHECK-NEXT: eors r3, r0
|
||||
; CHECK-NEXT: orrs r2, r3
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: cset r2, eq
|
||||
; CHECK-NEXT: tst.w r2, #1
|
||||
; CHECK-NEXT: csetm r2, ne
|
||||
; CHECK-NEXT: eors r1, r3
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: eors r0, r3
|
||||
; CHECK-NEXT: vmov.32 q3[0], r2
|
||||
; CHECK-NEXT: vmov.32 q3[1], r2
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: eors r1, r2
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: eors r0, r2
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r2
|
||||
; CHECK-NEXT: vbic q2, q2, q0
|
||||
; CHECK-NEXT: vand q0, q1, q0
|
||||
; CHECK-NEXT: vorr q0, q0, q2
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vbic q0, q2, q3
|
||||
; CHECK-NEXT: vand q1, q1, q3
|
||||
; CHECK-NEXT: vorr q0, q1, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%i = insertelement <2 x i64> undef, i64 %src2, i32 0
|
||||
|
@ -1080,76 +1096,84 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i32> @vcmp_r_multi_v2i32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
|
||||
; CHECK-LABEL: vcmp_r_multi_v2i32:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r2, s8
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: vmov r2, s8
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: vbic q0, q2, q0
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: subs r1, r0, r2
|
||||
; CHECK-NEXT: asr.w r12, r0, #31
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vbic q0, q2, q3
|
||||
; CHECK-NEXT: vmov lr, s0
|
||||
; CHECK-NEXT: subs.w r1, lr, r2
|
||||
; CHECK-NEXT: asr.w r12, lr, #31
|
||||
; CHECK-NEXT: sbcs.w r1, r12, r2, asr #31
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r2, s10
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: vmov r1, s10
|
||||
; CHECK-NEXT: csetm lr, ne
|
||||
; CHECK-NEXT: asr.w r12, r2, #31
|
||||
; CHECK-NEXT: subs r4, r2, r1
|
||||
; CHECK-NEXT: sbcs.w r1, r12, r1, asr #31
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov.32 q3[0], r1
|
||||
; CHECK-NEXT: vmov.32 q3[1], r1
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: subs r0, r1, r2
|
||||
; CHECK-NEXT: asr.w r12, r1, #31
|
||||
; CHECK-NEXT: sbcs.w r0, r12, r2, asr #31
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: cset r0, ne
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, lr
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, lr
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: cset r1, ne
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r1, r0
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r1, r0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r1, s6
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: cmp.w lr, #0
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: cset r0, ne
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: cset r1, ne
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: vand q1, q1, q4
|
||||
; CHECK-NEXT: vmov.32 q4[0], r0
|
||||
; CHECK-NEXT: vmov.32 q4[1], r0
|
||||
; CHECK-NEXT: cset r0, ne
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q4[2], r0
|
||||
; CHECK-NEXT: vmov.32 q4[3], r0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: cset r0, ne
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q5[0], r0
|
||||
; CHECK-NEXT: vmov.32 q5[1], r0
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: cset r0, ne
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q5[2], r0
|
||||
; CHECK-NEXT: vmov.32 q5[3], r0
|
||||
; CHECK-NEXT: vand q1, q5, q4
|
||||
; CHECK-NEXT: vand q1, q3, q1
|
||||
; CHECK-NEXT: vbic q0, q0, q1
|
||||
; CHECK-NEXT: vand q1, q2, q1
|
||||
; CHECK-NEXT: vorr q0, q1, q0
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
%a4 = icmp eq <2 x i64> %a, zeroinitializer
|
||||
%a5 = select <2 x i1> %a4, <2 x i32> zeroinitializer, <2 x i32> %c
|
||||
%a6 = icmp ne <2 x i32> %b, zeroinitializer
|
||||
|
|
|
@ -363,21 +363,23 @@ define arm_aapcs_vfpcc <2 x i64> @vcmp_eqz_v2i64(<2 x i64> %src, <2 x i64> %a, <
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: vbic q2, q2, q0
|
||||
; CHECK-NEXT: vand q0, q1, q0
|
||||
; CHECK-NEXT: vorr q0, q0, q2
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vbic q0, q2, q3
|
||||
; CHECK-NEXT: vand q1, q1, q3
|
||||
; CHECK-NEXT: vorr q0, q1, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = icmp eq <2 x i64> %src, zeroinitializer
|
||||
|
@ -390,21 +392,23 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_eqz_v2i32(<2 x i64> %src, <2 x i32> %a, <
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: vbic q2, q2, q0
|
||||
; CHECK-NEXT: vand q0, q1, q0
|
||||
; CHECK-NEXT: vorr q0, q0, q2
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vbic q0, q2, q3
|
||||
; CHECK-NEXT: vand q1, q1, q3
|
||||
; CHECK-NEXT: vorr q0, q1, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = icmp eq <2 x i64> %src, zeroinitializer
|
||||
|
@ -777,21 +781,23 @@ define arm_aapcs_vfpcc <2 x i64> @vcmp_r_eqz_v2i64(<2 x i64> %src, <2 x i64> %a,
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: vbic q2, q2, q0
|
||||
; CHECK-NEXT: vand q0, q1, q0
|
||||
; CHECK-NEXT: vorr q0, q0, q2
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vbic q0, q2, q3
|
||||
; CHECK-NEXT: vand q1, q1, q3
|
||||
; CHECK-NEXT: vorr q0, q1, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = icmp eq <2 x i64> zeroinitializer, %src
|
||||
|
@ -804,21 +810,23 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_r_eqz_v2i32(<2 x i64> %src, <2 x i32> %a,
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: cset r1, eq
|
||||
; CHECK-NEXT: tst.w r1, #1
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: vbic q2, q2, q0
|
||||
; CHECK-NEXT: vand q0, q1, q0
|
||||
; CHECK-NEXT: vorr q0, q0, q2
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: cset r0, eq
|
||||
; CHECK-NEXT: tst.w r0, #1
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vbic q0, q2, q3
|
||||
; CHECK-NEXT: vand q1, q1, q3
|
||||
; CHECK-NEXT: vorr q0, q1, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = icmp eq <2 x i64> %src, zeroinitializer
|
||||
|
|
|
@ -4,8 +4,10 @@
|
|||
define arm_aapcs_vfpcc <4 x i32> @vcreate_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
|
||||
; CHECK-LABEL: vcreate_i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r1
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r2, r0
|
||||
; CHECK-NEXT: vmov.32 q0[0], r1
|
||||
; CHECK-NEXT: vmov.32 q0[1], r0
|
||||
; CHECK-NEXT: vmov.32 q0[2], r3
|
||||
; CHECK-NEXT: vmov.32 q0[3], r2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%conv = zext i32 %a to i64
|
||||
|
@ -25,8 +27,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @insert_0123(i32 %a, i32 %b, i32 %c, i32 %d) {
|
||||
; CHECK-LABEL: insert_0123:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
; CHECK-NEXT: vmov.32 q0[0], r0
|
||||
; CHECK-NEXT: vmov.32 q0[1], r1
|
||||
; CHECK-NEXT: vmov.32 q0[2], r2
|
||||
; CHECK-NEXT: vmov.32 q0[3], r3
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%v1 = insertelement <4 x i32> undef, i32 %a, i32 0
|
||||
|
@ -39,8 +43,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @insert_3210(i32 %a, i32 %b, i32 %c, i32 %d) {
|
||||
; CHECK-LABEL: insert_3210:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r3
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r2
|
||||
; CHECK-NEXT: vmov.32 q0[0], r3
|
||||
; CHECK-NEXT: vmov.32 q0[1], r2
|
||||
; CHECK-NEXT: vmov.32 q0[2], r1
|
||||
; CHECK-NEXT: vmov.32 q0[3], r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%v1 = insertelement <4 x i32> undef, i32 %a, i32 3
|
||||
|
@ -53,8 +59,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @insert_0213(i32 %a, i32 %b, i32 %c, i32 %d) {
|
||||
; CHECK-LABEL: insert_0213:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
|
||||
; CHECK-NEXT: vmov.32 q0[0], r0
|
||||
; CHECK-NEXT: vmov.32 q0[1], r2
|
||||
; CHECK-NEXT: vmov.32 q0[2], r1
|
||||
; CHECK-NEXT: vmov.32 q0[3], r3
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%v1 = insertelement <4 x i32> undef, i32 %a, i32 0
|
||||
|
@ -67,7 +75,8 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @insert_0220(i32 %a, i32 %b, i32 %c, i32 %d) {
|
||||
; CHECK-LABEL: insert_0220:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r3
|
||||
; CHECK-NEXT: vmov.32 q0[0], r3
|
||||
; CHECK-NEXT: vmov.32 q0[2], r2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%v1 = insertelement <4 x i32> undef, i32 %a, i32 0
|
||||
|
@ -80,8 +89,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @insert_321(i32 %a, i32 %b, i32 %c, i32 %d) {
|
||||
; CHECK-LABEL: insert_321:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.32 q0[1], r2
|
||||
; CHECK-NEXT: vmov.32 q0[2], r1
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r2
|
||||
; CHECK-NEXT: vmov.32 q0[3], r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%v1 = insertelement <4 x i32> undef, i32 %a, i32 3
|
||||
|
@ -94,7 +104,8 @@ define arm_aapcs_vfpcc <4 x i32> @insert_310(i32 %a, i32 %b, i32 %c, i32 %d) {
|
|||
; CHECK-LABEL: insert_310:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.32 q0[0], r2
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
|
||||
; CHECK-NEXT: vmov.32 q0[1], r1
|
||||
; CHECK-NEXT: vmov.32 q0[3], r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%v1 = insertelement <4 x i32> undef, i32 %a, i32 3
|
||||
|
@ -106,7 +117,8 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @insert_320(i32 %a, i32 %b, i32 %c, i32 %d) {
|
||||
; CHECK-LABEL: insert_320:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r2
|
||||
; CHECK-NEXT: vmov.32 q0[0], r2
|
||||
; CHECK-NEXT: vmov.32 q0[2], r1
|
||||
; CHECK-NEXT: vmov.32 q0[3], r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
|
@ -119,7 +131,8 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @insert_31(i32 %a, i32 %b, i32 %c, i32 %d) {
|
||||
; CHECK-LABEL: insert_31:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
|
||||
; CHECK-NEXT: vmov.32 q0[1], r1
|
||||
; CHECK-NEXT: vmov.32 q0[3], r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%v1 = insertelement <4 x i32> undef, i32 %a, i32 3
|
||||
|
@ -152,8 +165,9 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @insert_210(i32 %a, i32 %b, i32 %c, i32 %d) {
|
||||
; CHECK-LABEL: insert_210:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.32 q0[0], r2
|
||||
; CHECK-NEXT: vmov.32 q0[1], r1
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
|
||||
; CHECK-NEXT: vmov.32 q0[2], r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%v1 = insertelement <4 x i32> undef, i32 %a, i32 2
|
||||
|
@ -165,7 +179,8 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @insert_20(i32 %a, i32 %b, i32 %c, i32 %d) {
|
||||
; CHECK-LABEL: insert_20:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
|
||||
; CHECK-NEXT: vmov.32 q0[0], r1
|
||||
; CHECK-NEXT: vmov.32 q0[2], r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%v1 = insertelement <4 x i32> undef, i32 %a, i32 2
|
||||
|
@ -230,26 +245,28 @@ entry:
|
|||
define hidden <8 x i16> @create_i16(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i16 zeroext %d, i16 zeroext %a2, i16 zeroext %b2, i16 zeroext %c2, i16 zeroext %d2) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: create_i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r5, r6, r7, lr}
|
||||
; CHECK-NEXT: movs r7, #0
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: movs r5, #0
|
||||
; CHECK-NEXT: lsll r2, r7, #16
|
||||
; CHECK-NEXT: orr.w r0, r1, r0, lsl #16
|
||||
; CHECK-NEXT: orr.w r12, r2, r3
|
||||
; CHECK-NEXT: ldr r2, [sp, #24]
|
||||
; CHECK-NEXT: ldr r3, [sp, #28]
|
||||
; CHECK-NEXT: orrs r0, r7
|
||||
; CHECK-NEXT: lsll r2, r5, #16
|
||||
; CHECK-NEXT: orrs r2, r3
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r12
|
||||
; CHECK-NEXT: ldrd r1, r2, [sp, #16]
|
||||
; CHECK-NEXT: orr.w r1, r2, r1, lsl #16
|
||||
; CHECK-NEXT: orrs r1, r5
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: ldrd lr, r4, [sp, #16]
|
||||
; CHECK-NEXT: orr.w r1, r2, r3
|
||||
; CHECK-NEXT: ldr.w r12, [sp, #24]
|
||||
; CHECK-NEXT: orrs r0, r5
|
||||
; CHECK-NEXT: vmov.32 q0[0], r1
|
||||
; CHECK-NEXT: movs r7, #0
|
||||
; CHECK-NEXT: vmov.32 q0[1], r0
|
||||
; CHECK-NEXT: ldr r0, [sp, #28]
|
||||
; CHECK-NEXT: lsll r12, r7, #16
|
||||
; CHECK-NEXT: orr.w r4, r4, lr, lsl #16
|
||||
; CHECK-NEXT: orr.w r0, r0, r12
|
||||
; CHECK-NEXT: orrs r7, r4
|
||||
; CHECK-NEXT: vmov.32 q0[2], r0
|
||||
; CHECK-NEXT: vmov.32 q0[3], r7
|
||||
; CHECK-NEXT: vmov r0, r1, d0
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: pop {r5, r6, r7, pc}
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
entry:
|
||||
%conv = zext i16 %a to i64
|
||||
%shl = shl nuw i64 %conv, 48
|
||||
|
@ -308,59 +325,59 @@ entry:
|
|||
define hidden <16 x i8> @create_i8(i8 zeroext %a1, i8 zeroext %b1, i8 zeroext %c1, i8 zeroext %d1, i8 zeroext %a2, i8 zeroext %b2, i8 zeroext %c2, i8 zeroext %d2, i8 zeroext %a3, i8 zeroext %b3, i8 zeroext %c3, i8 zeroext %d3, i8 zeroext %a4, i8 zeroext %b4, i8 zeroext %c4, i8 zeroext %d4) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: create_i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r11, lr}
|
||||
; CHECK-NEXT: ldr r4, [sp, #36]
|
||||
; CHECK-NEXT: .save {r4, r5, r7, r9, r11, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r7, r9, r11, lr}
|
||||
; CHECK-NEXT: ldr.w r12, [sp, #28]
|
||||
; CHECK-NEXT: mov.w r11, #0
|
||||
; CHECK-NEXT: ldr r6, [sp, #32]
|
||||
; CHECK-NEXT: movs r7, #0
|
||||
; CHECK-NEXT: lsll r4, r11, #16
|
||||
; CHECK-NEXT: mov lr, r1
|
||||
; CHECK-NEXT: lsll r6, r7, #24
|
||||
; CHECK-NEXT: mov r12, r3
|
||||
; CHECK-NEXT: orr.w r1, r6, r4
|
||||
; CHECK-NEXT: ldr r4, [sp, #40]
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: ldr r6, [sp, #68]
|
||||
; CHECK-NEXT: lsll r4, r3, #8
|
||||
; CHECK-NEXT: ldr r4, [sp, #24]
|
||||
; CHECK-NEXT: movs r5, #0
|
||||
; CHECK-NEXT: orrs r1, r4
|
||||
; CHECK-NEXT: ldr r4, [sp, #44]
|
||||
; CHECK-NEXT: lsll r6, r5, #16
|
||||
; CHECK-NEXT: mov.w r9, #0
|
||||
; CHECK-NEXT: orr.w r8, r1, r4
|
||||
; CHECK-NEXT: ldr r4, [sp, #64]
|
||||
; CHECK-NEXT: lsll r12, r11, #16
|
||||
; CHECK-NEXT: lsls r1, r1, #16
|
||||
; CHECK-NEXT: lsll r4, r5, #24
|
||||
; CHECK-NEXT: orr.w r0, r1, r0, lsl #22
|
||||
; CHECK-NEXT: orr.w r12, r12, r4
|
||||
; CHECK-NEXT: ldr r4, [sp, #32]
|
||||
; CHECK-NEXT: movs r7, #0
|
||||
; CHECK-NEXT: orr.w r0, r0, r2, lsl #8
|
||||
; CHECK-NEXT: lsll r4, r7, #8
|
||||
; CHECK-NEXT: add r0, r3
|
||||
; CHECK-NEXT: orr.w r12, r12, r4
|
||||
; CHECK-NEXT: ldr r4, [sp, #36]
|
||||
; CHECK-NEXT: orrs r0, r5
|
||||
; CHECK-NEXT: ldr r2, [sp, #56]
|
||||
; CHECK-NEXT: orr.w r0, r0, r11
|
||||
; CHECK-NEXT: orr.w r4, r4, r12
|
||||
; CHECK-NEXT: vmov.32 q0[0], r4
|
||||
; CHECK-NEXT: orrs r0, r7
|
||||
; CHECK-NEXT: vmov.32 q0[1], r0
|
||||
; CHECK-NEXT: ldr r0, [sp, #60]
|
||||
; CHECK-NEXT: movs r1, #0
|
||||
; CHECK-NEXT: lsll r4, r1, #24
|
||||
; CHECK-NEXT: orrs r4, r6
|
||||
; CHECK-NEXT: ldr r6, [sp, #72]
|
||||
; CHECK-NEXT: lsll r6, r9, #8
|
||||
; CHECK-NEXT: orrs r4, r6
|
||||
; CHECK-NEXT: ldr r6, [sp, #76]
|
||||
; CHECK-NEXT: orrs r4, r6
|
||||
; CHECK-NEXT: lsl.w r6, lr, #16
|
||||
; CHECK-NEXT: orr.w r0, r6, r0, lsl #22
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r4, r8
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: lsll r0, r1, #16
|
||||
; CHECK-NEXT: lsll r2, r3, #24
|
||||
; CHECK-NEXT: orrs r0, r2
|
||||
; CHECK-NEXT: ldr r2, [sp, #64]
|
||||
; CHECK-NEXT: mov.w r9, #0
|
||||
; CHECK-NEXT: lsll r2, r9, #8
|
||||
; CHECK-NEXT: orrs r0, r2
|
||||
; CHECK-NEXT: ldr r2, [sp, #68]
|
||||
; CHECK-NEXT: orrs r0, r2
|
||||
; CHECK-NEXT: ldr r2, [sp, #40]
|
||||
; CHECK-NEXT: vmov.32 q0[2], r0
|
||||
; CHECK-NEXT: ldr r0, [sp, #44]
|
||||
; CHECK-NEXT: lsls r0, r0, #16
|
||||
; CHECK-NEXT: orr.w r0, r0, r2, lsl #22
|
||||
; CHECK-NEXT: ldr r2, [sp, #48]
|
||||
; CHECK-NEXT: orr.w r0, r0, r2, lsl #8
|
||||
; CHECK-NEXT: ldr r2, [sp, #52]
|
||||
; CHECK-NEXT: add r0, r12
|
||||
; CHECK-NEXT: orrs r0, r7
|
||||
; CHECK-NEXT: orr.w r0, r0, r11
|
||||
; CHECK-NEXT: lsls r2, r2, #16
|
||||
; CHECK-NEXT: add r0, r2
|
||||
; CHECK-NEXT: orrs r0, r3
|
||||
; CHECK-NEXT: ldr r3, [sp, #48]
|
||||
; CHECK-NEXT: orr.w r2, r2, r3, lsl #22
|
||||
; CHECK-NEXT: ldr r3, [sp, #56]
|
||||
; CHECK-NEXT: orr.w r2, r2, r3, lsl #8
|
||||
; CHECK-NEXT: ldr r3, [sp, #60]
|
||||
; CHECK-NEXT: add r2, r3
|
||||
; CHECK-NEXT: orrs r1, r2
|
||||
; CHECK-NEXT: orrs r1, r5
|
||||
; CHECK-NEXT: orr.w r1, r1, r9
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: orr.w r0, r0, r9
|
||||
; CHECK-NEXT: vmov.32 q0[3], r0
|
||||
; CHECK-NEXT: vmov r0, r1, d0
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r11, pc}
|
||||
; CHECK-NEXT: pop.w {r4, r5, r7, r9, r11, pc}
|
||||
entry:
|
||||
%conv = zext i8 %a1 to i64
|
||||
%shl = shl nuw nsw i64 %conv, 54
|
||||
|
|
|
@ -44,15 +44,17 @@ define arm_aapcs_vfpcc <4 x i32> @foo_int32_float(<4 x float> %src) {
|
|||
; CHECK-MVE-LABEL: foo_int32_float:
|
||||
; CHECK-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s0
|
||||
; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s2
|
||||
; CHECK-MVE-NEXT: vcvt.s32.f32 s8, s1
|
||||
; CHECK-MVE-NEXT: vcvt.s32.f32 s10, s3
|
||||
; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s1
|
||||
; CHECK-MVE-NEXT: vcvt.s32.f32 s10, s2
|
||||
; CHECK-MVE-NEXT: vcvt.s32.f32 s8, s3
|
||||
; CHECK-MVE-NEXT: vmov r0, s4
|
||||
; CHECK-MVE-NEXT: vmov r1, s6
|
||||
; CHECK-MVE-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-MVE-NEXT: vmov.32 q0[0], r0
|
||||
; CHECK-MVE-NEXT: vmov r0, s6
|
||||
; CHECK-MVE-NEXT: vmov.32 q0[1], r0
|
||||
; CHECK-MVE-NEXT: vmov r0, s10
|
||||
; CHECK-MVE-NEXT: vmov.32 q0[2], r0
|
||||
; CHECK-MVE-NEXT: vmov r0, s8
|
||||
; CHECK-MVE-NEXT: vmov r1, s10
|
||||
; CHECK-MVE-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-MVE-NEXT: vmov.32 q0[3], r0
|
||||
; CHECK-MVE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-MVEFP-LABEL: foo_int32_float:
|
||||
|
@ -68,15 +70,17 @@ define arm_aapcs_vfpcc <4 x i32> @foo_uint32_float(<4 x float> %src) {
|
|||
; CHECK-MVE-LABEL: foo_uint32_float:
|
||||
; CHECK-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s0
|
||||
; CHECK-MVE-NEXT: vcvt.u32.f32 s6, s2
|
||||
; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s1
|
||||
; CHECK-MVE-NEXT: vcvt.u32.f32 s10, s3
|
||||
; CHECK-MVE-NEXT: vcvt.u32.f32 s6, s1
|
||||
; CHECK-MVE-NEXT: vcvt.u32.f32 s10, s2
|
||||
; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s3
|
||||
; CHECK-MVE-NEXT: vmov r0, s4
|
||||
; CHECK-MVE-NEXT: vmov r1, s6
|
||||
; CHECK-MVE-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-MVE-NEXT: vmov.32 q0[0], r0
|
||||
; CHECK-MVE-NEXT: vmov r0, s6
|
||||
; CHECK-MVE-NEXT: vmov.32 q0[1], r0
|
||||
; CHECK-MVE-NEXT: vmov r0, s10
|
||||
; CHECK-MVE-NEXT: vmov.32 q0[2], r0
|
||||
; CHECK-MVE-NEXT: vmov r0, s8
|
||||
; CHECK-MVE-NEXT: vmov r1, s10
|
||||
; CHECK-MVE-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-MVE-NEXT: vmov.32 q0[3], r0
|
||||
; CHECK-MVE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-MVEFP-LABEL: foo_uint32_float:
|
||||
|
@ -345,21 +349,24 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @foo_int64_float(<2 x double> %src) {
|
||||
; CHECK-LABEL: foo_int64_float:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov q4, q0
|
||||
; CHECK-NEXT: vmov r0, r1, d8
|
||||
; CHECK-NEXT: bl __aeabi_d2lz
|
||||
; CHECK-NEXT: mov r4, r0
|
||||
; CHECK-NEXT: mov r5, r1
|
||||
; CHECK-NEXT: vmov r0, r1, d9
|
||||
; CHECK-NEXT: vmov r2, r3, d9
|
||||
; CHECK-NEXT: vmov.32 q4[0], r0
|
||||
; CHECK-NEXT: vmov.32 q4[1], r1
|
||||
; CHECK-NEXT: mov r0, r2
|
||||
; CHECK-NEXT: mov r1, r3
|
||||
; CHECK-NEXT: bl __aeabi_d2lz
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r4
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r5
|
||||
; CHECK-NEXT: vmov.32 q4[2], r0
|
||||
; CHECK-NEXT: vmov.32 q4[3], r1
|
||||
; CHECK-NEXT: vmov q0, q4
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%out = fptosi <2 x double> %src to <2 x i64>
|
||||
ret <2 x i64> %out
|
||||
|
@ -368,21 +375,24 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @foo_uint64_float(<2 x double> %src) {
|
||||
; CHECK-LABEL: foo_uint64_float:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov q4, q0
|
||||
; CHECK-NEXT: vmov r0, r1, d8
|
||||
; CHECK-NEXT: bl __aeabi_d2ulz
|
||||
; CHECK-NEXT: mov r4, r0
|
||||
; CHECK-NEXT: mov r5, r1
|
||||
; CHECK-NEXT: vmov r0, r1, d9
|
||||
; CHECK-NEXT: vmov r2, r3, d9
|
||||
; CHECK-NEXT: vmov.32 q4[0], r0
|
||||
; CHECK-NEXT: vmov.32 q4[1], r1
|
||||
; CHECK-NEXT: mov r0, r2
|
||||
; CHECK-NEXT: mov r1, r3
|
||||
; CHECK-NEXT: bl __aeabi_d2ulz
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r4
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r5
|
||||
; CHECK-NEXT: vmov.32 q4[2], r0
|
||||
; CHECK-NEXT: vmov.32 q4[3], r1
|
||||
; CHECK-NEXT: vmov q0, q4
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%out = fptoui <2 x double> %src to <2 x i64>
|
||||
ret <2 x i64> %out
|
||||
|
|
|
@ -38,8 +38,10 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @vdup_i64(i64 %src) {
|
||||
; CHECK-LABEL: vdup_i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r1
|
||||
; CHECK-NEXT: vmov.32 q0[0], r0
|
||||
; CHECK-NEXT: vmov.32 q0[1], r1
|
||||
; CHECK-NEXT: vmov.32 q0[2], r0
|
||||
; CHECK-NEXT: vmov.32 q0[3], r1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = insertelement <2 x i64> undef, i64 %src, i32 0
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -76,20 +76,22 @@ define <4 x i64> *@vld2_v2i64(<4 x i64> *%src, <2 x i64> *%dst) {
|
|||
; CHECK-NEXT: vmov.f32 s2, s8
|
||||
; CHECK-NEXT: vmov.f32 s7, s11
|
||||
; CHECK-NEXT: vmov.f32 s3, s9
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r12, s5
|
||||
; CHECK-NEXT: vmov lr, s1
|
||||
; CHECK-NEXT: vmov r4, s6
|
||||
; CHECK-NEXT: vmov r5, s2
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov r4, s4
|
||||
; CHECK-NEXT: vmov r5, s0
|
||||
; CHECK-NEXT: vmov r12, s7
|
||||
; CHECK-NEXT: vmov lr, s3
|
||||
; CHECK-NEXT: adds r6, r3, r2
|
||||
; CHECK-NEXT: vmov r3, s7
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r3, s5
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: adc.w r12, r12, lr
|
||||
; CHECK-NEXT: adds r5, r5, r4
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r5, r6
|
||||
; CHECK-NEXT: vmov.32 q0[0], r5
|
||||
; CHECK-NEXT: adcs r2, r3
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r2, r12
|
||||
; CHECK-NEXT: vmov.32 q0[1], r2
|
||||
; CHECK-NEXT: vmov.32 q0[2], r6
|
||||
; CHECK-NEXT: vmov.32 q0[3], r12
|
||||
; CHECK-NEXT: vstrw.32 q0, [r1]
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
|
|
|
@ -269,20 +269,22 @@ define void @vld2_v2i64(<4 x i64> *%src, <2 x i64> *%dst) {
|
|||
; CHECK-NEXT: vmov.f32 s2, s8
|
||||
; CHECK-NEXT: vmov.f32 s3, s9
|
||||
; CHECK-NEXT: vmov.f32 s7, s11
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov r12, s5
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r4, s2
|
||||
; CHECK-NEXT: adds.w lr, r0, r3
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov r3, s7
|
||||
; CHECK-NEXT: adc.w r12, r12, r2
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov r12, s7
|
||||
; CHECK-NEXT: adds.w lr, r0, r3
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r3, s5
|
||||
; CHECK-NEXT: adc.w r12, r12, r2
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: adds r0, r0, r4
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, lr
|
||||
; CHECK-NEXT: vmov.32 q0[0], r0
|
||||
; CHECK-NEXT: adcs r2, r3
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r2, r12
|
||||
; CHECK-NEXT: vmov.32 q0[1], r2
|
||||
; CHECK-NEXT: vmov.32 q0[2], lr
|
||||
; CHECK-NEXT: vmov.32 q0[3], r12
|
||||
; CHECK-NEXT: vstrw.32 q0, [r1]
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
entry:
|
||||
|
@ -297,58 +299,62 @@ entry:
|
|||
define void @vld2_v4i64(<8 x i64> *%src, <4 x i64> *%dst) {
|
||||
; CHECK-LABEL: vld2_v4i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r0, #32]
|
||||
; CHECK-NEXT: vldrw.u32 q5, [r0, #48]
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q5, [r0, #16]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #32]
|
||||
; CHECK-NEXT: vldrw.u32 q4, [r0, #48]
|
||||
; CHECK-NEXT: vmov.f64 d4, d1
|
||||
; CHECK-NEXT: vmov.f32 s9, s3
|
||||
; CHECK-NEXT: vmov.f32 s10, s22
|
||||
; CHECK-NEXT: vmov.f32 s2, s20
|
||||
; CHECK-NEXT: vmov.f32 s11, s23
|
||||
; CHECK-NEXT: vmov.f32 s3, s21
|
||||
; CHECK-NEXT: vmov r3, s8
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov r12, s9
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov.f64 d6, d3
|
||||
; CHECK-NEXT: vmov.f32 s13, s7
|
||||
; CHECK-NEXT: vmov.f32 s14, s18
|
||||
; CHECK-NEXT: vmov.f32 s6, s16
|
||||
; CHECK-NEXT: vmov.f32 s7, s17
|
||||
; CHECK-NEXT: vmov.f32 s15, s19
|
||||
; CHECK-NEXT: vmov r4, s4
|
||||
; CHECK-NEXT: vmov r5, s14
|
||||
; CHECK-NEXT: vmov r6, s6
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #16]
|
||||
; CHECK-NEXT: vmov.f64 d8, d7
|
||||
; CHECK-NEXT: vmov.f32 s17, s15
|
||||
; CHECK-NEXT: vmov.f32 s18, s22
|
||||
; CHECK-NEXT: vmov.f32 s14, s20
|
||||
; CHECK-NEXT: vmov.f32 s15, s21
|
||||
; CHECK-NEXT: vmov.f32 s19, s23
|
||||
; CHECK-NEXT: vmov r3, s18
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: vmov r4, s12
|
||||
; CHECK-NEXT: vmov.f64 d2, d1
|
||||
; CHECK-NEXT: vmov r12, s19
|
||||
; CHECK-NEXT: vmov r2, s15
|
||||
; CHECK-NEXT: vmov.f32 s5, s3
|
||||
; CHECK-NEXT: vmov.f32 s6, s10
|
||||
; CHECK-NEXT: vmov.f32 s2, s8
|
||||
; CHECK-NEXT: vmov.f32 s7, s11
|
||||
; CHECK-NEXT: vmov.f32 s3, s9
|
||||
; CHECK-NEXT: adds.w lr, r0, r3
|
||||
; CHECK-NEXT: vmov r3, s12
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov r0, s16
|
||||
; CHECK-NEXT: vmov r3, s17
|
||||
; CHECK-NEXT: adc.w r12, r12, r2
|
||||
; CHECK-NEXT: vmov r2, s13
|
||||
; CHECK-NEXT: adds r3, r3, r4
|
||||
; CHECK-NEXT: vmov r4, s7
|
||||
; CHECK-NEXT: adcs r0, r2
|
||||
; CHECK-NEXT: vmov r2, s15
|
||||
; CHECK-NEXT: adds r5, r5, r6
|
||||
; CHECK-NEXT: vmov r6, s2
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r5, r3
|
||||
; CHECK-NEXT: adcs r2, r4
|
||||
; CHECK-NEXT: vmov r4, s10
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r2, r0
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: adds r0, r0, r4
|
||||
; CHECK-NEXT: vmov r4, s2
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: adcs r2, r3
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: vmov.32 q3[1], r2
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov.32 q3[2], lr
|
||||
; CHECK-NEXT: vmov.32 q3[3], r12
|
||||
; CHECK-NEXT: vstrw.32 q3, [r1, #16]
|
||||
; CHECK-NEXT: adds r4, r4, r6
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r4, lr
|
||||
; CHECK-NEXT: adcs r0, r2
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r0, r12
|
||||
; CHECK-NEXT: vstrw.32 q1, [r1]
|
||||
; CHECK-NEXT: adds.w lr, r4, r3
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: adc.w r12, r2, r0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r2, s5
|
||||
; CHECK-NEXT: vmov r4, s1
|
||||
; CHECK-NEXT: adds r0, r0, r3
|
||||
; CHECK-NEXT: vmov.32 q0[0], r0
|
||||
; CHECK-NEXT: adcs r2, r4
|
||||
; CHECK-NEXT: vmov.32 q0[1], r2
|
||||
; CHECK-NEXT: vmov.32 q0[2], lr
|
||||
; CHECK-NEXT: vmov.32 q0[3], r12
|
||||
; CHECK-NEXT: vstrw.32 q0, [r1]
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
entry:
|
||||
%l1 = load <8 x i64>, <8 x i64>* %src, align 4
|
||||
%s1 = shufflevector <8 x i64> %l1, <8 x i64> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -119,41 +119,43 @@ define <8 x i64> *@vld4_v2i64(<8 x i64> *%src, <2 x i64> *%dst) {
|
|||
; CHECK-NEXT: vmov.f32 s14, s20
|
||||
; CHECK-NEXT: vmov.f32 s19, s23
|
||||
; CHECK-NEXT: vmov.f32 s15, s21
|
||||
; CHECK-NEXT: vmov r2, s16
|
||||
; CHECK-NEXT: vmov r3, s12
|
||||
; CHECK-NEXT: vmov r2, s18
|
||||
; CHECK-NEXT: vmov r3, s14
|
||||
; CHECK-NEXT: vmov.f32 s5, s3
|
||||
; CHECK-NEXT: vmov.f32 s6, s10
|
||||
; CHECK-NEXT: vmov.f32 s2, s8
|
||||
; CHECK-NEXT: vmov.f32 s3, s9
|
||||
; CHECK-NEXT: vmov.f32 s7, s11
|
||||
; CHECK-NEXT: vmov r12, s17
|
||||
; CHECK-NEXT: vmov lr, s13
|
||||
; CHECK-NEXT: vmov r4, s4
|
||||
; CHECK-NEXT: vmov r5, s0
|
||||
; CHECK-NEXT: vmov r7, s2
|
||||
; CHECK-NEXT: vmov r12, s19
|
||||
; CHECK-NEXT: vmov lr, s15
|
||||
; CHECK-NEXT: vmov r4, s6
|
||||
; CHECK-NEXT: vmov r5, s2
|
||||
; CHECK-NEXT: vmov r7, s0
|
||||
; CHECK-NEXT: adds r6, r3, r2
|
||||
; CHECK-NEXT: vmov r2, s5
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmov r2, s7
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: adc.w r12, r12, lr
|
||||
; CHECK-NEXT: adds r5, r5, r4
|
||||
; CHECK-NEXT: vmov r4, s18
|
||||
; CHECK-NEXT: vmov r4, s16
|
||||
; CHECK-NEXT: adcs r2, r3
|
||||
; CHECK-NEXT: adds.w lr, r5, r6
|
||||
; CHECK-NEXT: adc.w r12, r12, r2
|
||||
; CHECK-NEXT: vmov r2, s14
|
||||
; CHECK-NEXT: vmov r6, s19
|
||||
; CHECK-NEXT: vmov r5, s15
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: vmov r2, s12
|
||||
; CHECK-NEXT: vmov r6, s17
|
||||
; CHECK-NEXT: vmov r5, s13
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: adds r2, r2, r4
|
||||
; CHECK-NEXT: vmov r4, s3
|
||||
; CHECK-NEXT: vmov r4, s1
|
||||
; CHECK-NEXT: adcs r6, r5
|
||||
; CHECK-NEXT: vmov r5, s7
|
||||
; CHECK-NEXT: vmov r5, s5
|
||||
; CHECK-NEXT: adds r3, r3, r7
|
||||
; CHECK-NEXT: adcs r4, r5
|
||||
; CHECK-NEXT: adds r2, r2, r3
|
||||
; CHECK-NEXT: adc.w r3, r4, r6
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, lr
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r12
|
||||
; CHECK-NEXT: vmov.32 q0[0], r2
|
||||
; CHECK-NEXT: vmov.32 q0[1], r3
|
||||
; CHECK-NEXT: vmov.32 q0[2], lr
|
||||
; CHECK-NEXT: vmov.32 q0[3], r12
|
||||
; CHECK-NEXT: vstrw.32 q0, [r1]
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: add sp, #4
|
||||
|
|
|
@ -229,33 +229,41 @@ define void @vld4_v4i16(<16 x i16> *%src, <4 x i16> *%dst) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[3]
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[3]
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r0, r2
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[7]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
|
||||
; CHECK-NEXT: vmov.u16 r2, q1[3]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[3]
|
||||
; CHECK-NEXT: vmov.32 q2[0], r2
|
||||
; CHECK-NEXT: vmov.u16 r2, q1[7]
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r2, r0
|
||||
; CHECK-NEXT: vmov.32 q2[1], r2
|
||||
; CHECK-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[7]
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[2]
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[6]
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[2]
|
||||
; CHECK-NEXT: vmov.u16 r2, q1[2]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r2, r0
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[6]
|
||||
; CHECK-NEXT: vmov.u16 r2, q1[6]
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r2, r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[0]
|
||||
; CHECK-NEXT: vmov.u16 r2, q1[0]
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[0]
|
||||
; CHECK-NEXT: vadd.i32 q2, q3, q2
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r2, r0
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[4]
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[0]
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[1]
|
||||
; CHECK-NEXT: vmov.32 q4[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[5]
|
||||
; CHECK-NEXT: vmov.32 q4[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[1]
|
||||
; CHECK-NEXT: vmov.u16 r2, q1[1]
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r2, r0
|
||||
; CHECK-NEXT: vmov.32 q4[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[5]
|
||||
; CHECK-NEXT: vmov.u16 r2, q1[5]
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r2, r0
|
||||
; CHECK-NEXT: vmov.32 q4[3], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[4]
|
||||
; CHECK-NEXT: vmov.u16 r2, q1[4]
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r2, r0
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vadd.i32 q0, q3, q4
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, q2
|
||||
; CHECK-NEXT: vstrh.32 q0, [r1]
|
||||
|
@ -382,12 +390,14 @@ define void @vld4_v4i8(<16 x i8> *%src, <4 x i8> *%dst) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[2]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[10]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r2, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[6]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[14]
|
||||
; CHECK-NEXT: vrev32.8 q2, q0
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r2, r0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[6]
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[10]
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[14]
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, q2
|
||||
; CHECK-NEXT: vrev16.8 q2, q0
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, q2
|
||||
|
@ -545,42 +555,44 @@ define void @vld4_v2i64(<8 x i64> *%src, <2 x i64> *%dst) {
|
|||
; CHECK-NEXT: vmov.f32 s14, s20
|
||||
; CHECK-NEXT: vmov.f32 s19, s23
|
||||
; CHECK-NEXT: vmov.f32 s15, s21
|
||||
; CHECK-NEXT: vmov r3, s16
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmov r12, s17
|
||||
; CHECK-NEXT: vmov r2, s13
|
||||
; CHECK-NEXT: vmov r3, s18
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: vmov.f64 d2, d1
|
||||
; CHECK-NEXT: vmov r12, s19
|
||||
; CHECK-NEXT: vmov r2, s15
|
||||
; CHECK-NEXT: vmov.f32 s5, s3
|
||||
; CHECK-NEXT: vmov.f32 s6, s10
|
||||
; CHECK-NEXT: vmov.f32 s2, s8
|
||||
; CHECK-NEXT: vmov.f32 s3, s9
|
||||
; CHECK-NEXT: vmov.f32 s7, s11
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: vmov r5, s6
|
||||
; CHECK-NEXT: vmov r6, s2
|
||||
; CHECK-NEXT: vmov r4, s2
|
||||
; CHECK-NEXT: vmov r5, s4
|
||||
; CHECK-NEXT: vmov r6, s0
|
||||
; CHECK-NEXT: adds.w lr, r0, r3
|
||||
; CHECK-NEXT: vmov r3, s5
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov r3, s7
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: adc.w r12, r12, r2
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: adds r2, r2, r4
|
||||
; CHECK-NEXT: vmov r4, s15
|
||||
; CHECK-NEXT: vmov r4, s13
|
||||
; CHECK-NEXT: adcs r0, r3
|
||||
; CHECK-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-NEXT: adc.w r12, r12, r0
|
||||
; CHECK-NEXT: vmov r0, s18
|
||||
; CHECK-NEXT: vmov r2, s14
|
||||
; CHECK-NEXT: vmov r3, s19
|
||||
; CHECK-NEXT: vmov r0, s16
|
||||
; CHECK-NEXT: vmov r2, s12
|
||||
; CHECK-NEXT: vmov r3, s17
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r2, r4, r3
|
||||
; CHECK-NEXT: vmov r3, s7
|
||||
; CHECK-NEXT: vmov r4, s3
|
||||
; CHECK-NEXT: vmov r3, s5
|
||||
; CHECK-NEXT: vmov r4, s1
|
||||
; CHECK-NEXT: adds r5, r5, r6
|
||||
; CHECK-NEXT: adcs r3, r4
|
||||
; CHECK-NEXT: adds r0, r0, r5
|
||||
; CHECK-NEXT: adcs r2, r3
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, lr
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r2, r12
|
||||
; CHECK-NEXT: vmov.32 q0[0], r0
|
||||
; CHECK-NEXT: vmov.32 q0[1], r2
|
||||
; CHECK-NEXT: vmov.32 q0[2], lr
|
||||
; CHECK-NEXT: vmov.32 q0[3], r12
|
||||
; CHECK-NEXT: vstrw.32 q0, [r1]
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
|
@ -600,118 +612,123 @@ entry:
|
|||
define void @vld4_v4i64(<16 x i64> *%src, <4 x i64> *%dst) {
|
||||
; CHECK-LABEL: vld4_v4i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr}
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: .pad #64
|
||||
; CHECK-NEXT: sub sp, #64
|
||||
; CHECK-NEXT: .pad #72
|
||||
; CHECK-NEXT: sub sp, #72
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #64]
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #96]
|
||||
; CHECK-NEXT: vldrw.u32 q4, [r0, #80]
|
||||
; CHECK-NEXT: vldrw.u32 q6, [r0, #16]
|
||||
; CHECK-NEXT: vmov.f64 d4, d3
|
||||
; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r0]
|
||||
; CHECK-NEXT: vstrw.32 q1, [sp, #32] @ 16-byte Spill
|
||||
; CHECK-NEXT: vmov.f32 s9, s7
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #32]
|
||||
; CHECK-NEXT: vmov.f32 s10, s2
|
||||
; CHECK-NEXT: vmov.f32 s11, s3
|
||||
; CHECK-NEXT: vldrw.u32 q5, [r0, #80]
|
||||
; CHECK-NEXT: vldrw.u32 q7, [r0, #16]
|
||||
; CHECK-NEXT: vmov.f64 d8, d3
|
||||
; CHECK-NEXT: vstrw.32 q0, [sp, #32] @ 16-byte Spill
|
||||
; CHECK-NEXT: vstrw.32 q1, [sp, #48] @ 16-byte Spill
|
||||
; CHECK-NEXT: vmov.f32 s17, s7
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vmov.f32 s18, s2
|
||||
; CHECK-NEXT: vmov.f32 s19, s3
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #112]
|
||||
; CHECK-NEXT: vmov.f64 d14, d9
|
||||
; CHECK-NEXT: vstrw.32 q2, [sp, #48] @ 16-byte Spill
|
||||
; CHECK-NEXT: vmov.f64 d12, d11
|
||||
; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
|
||||
; CHECK-NEXT: vmov.f32 s25, s23
|
||||
; CHECK-NEXT: vmov.f32 s26, s2
|
||||
; CHECK-NEXT: vmov.f64 d6, d3
|
||||
; CHECK-NEXT: vmov.f32 s27, s3
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #32]
|
||||
; CHECK-NEXT: vmov.f32 s13, s7
|
||||
; CHECK-NEXT: vmov.f32 s14, s2
|
||||
; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill
|
||||
; CHECK-NEXT: vmov.f32 s29, s19
|
||||
; CHECK-NEXT: vmov.f32 s30, s2
|
||||
; CHECK-NEXT: vmov.f64 d4, d13
|
||||
; CHECK-NEXT: vmov.f32 s31, s3
|
||||
; CHECK-NEXT: vmov.f64 d4, d15
|
||||
; CHECK-NEXT: vmov.f32 s15, s3
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #48]
|
||||
; CHECK-NEXT: vmov.f32 s9, s27
|
||||
; CHECK-NEXT: vmov.f32 s9, s31
|
||||
; CHECK-NEXT: vmov.f32 s10, s2
|
||||
; CHECK-NEXT: vmov.f32 s26, s0
|
||||
; CHECK-NEXT: vmov.f32 s30, s0
|
||||
; CHECK-NEXT: vmov.f32 s11, s3
|
||||
; CHECK-NEXT: vmov.f32 s27, s1
|
||||
; CHECK-NEXT: vmov.f32 s31, s1
|
||||
; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload
|
||||
; CHECK-NEXT: vmov r3, s8
|
||||
; CHECK-NEXT: vmov r0, s24
|
||||
; CHECK-NEXT: vmov r12, s9
|
||||
; CHECK-NEXT: vmov r2, s25
|
||||
; CHECK-NEXT: vmov.f64 d10, d7
|
||||
; CHECK-NEXT: vmov.f32 s21, s15
|
||||
; CHECK-NEXT: vmov.f32 s22, s6
|
||||
; CHECK-NEXT: vmov.f32 s14, s4
|
||||
; CHECK-NEXT: vmov.f32 s15, s5
|
||||
; CHECK-NEXT: vmov.f32 s23, s7
|
||||
; CHECK-NEXT: vldrw.u32 q1, [sp, #16] @ 16-byte Reload
|
||||
; CHECK-NEXT: vmov r4, s12
|
||||
; CHECK-NEXT: vmov.f32 s18, s0
|
||||
; CHECK-NEXT: vmov.f32 s19, s1
|
||||
; CHECK-NEXT: vmov r3, s10
|
||||
; CHECK-NEXT: vmov r0, s30
|
||||
; CHECK-NEXT: vmov.f32 s6, s0
|
||||
; CHECK-NEXT: vmov.f32 s7, s1
|
||||
; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload
|
||||
; CHECK-NEXT: vmov r4, s6
|
||||
; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill
|
||||
; CHECK-NEXT: vmov r12, s11
|
||||
; CHECK-NEXT: vmov r2, s31
|
||||
; CHECK-NEXT: vmov.f32 s22, s0
|
||||
; CHECK-NEXT: vmov.f32 s23, s1
|
||||
; CHECK-NEXT: vldrw.u32 q0, [sp, #48] @ 16-byte Reload
|
||||
; CHECK-NEXT: vmov r5, s18
|
||||
; CHECK-NEXT: vmov r7, s16
|
||||
; CHECK-NEXT: adds.w lr, r0, r3
|
||||
; CHECK-NEXT: vmov r3, s20
|
||||
; CHECK-NEXT: vmov r0, s13
|
||||
; CHECK-NEXT: adc.w r12, r12, r2
|
||||
; CHECK-NEXT: vmov r2, s21
|
||||
; CHECK-NEXT: adds r3, r3, r4
|
||||
; CHECK-NEXT: vmov r4, s30
|
||||
; CHECK-NEXT: adcs r0, r2
|
||||
; CHECK-NEXT: adds.w lr, lr, r3
|
||||
; CHECK-NEXT: vmov r2, s18
|
||||
; CHECK-NEXT: adc.w r12, r12, r0
|
||||
; CHECK-NEXT: vmov r0, s31
|
||||
; CHECK-NEXT: vmov r3, s19
|
||||
; CHECK-NEXT: adds r2, r2, r4
|
||||
; CHECK-NEXT: adcs r3, r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vldrw.u32 q0, [sp, #32] @ 16-byte Reload
|
||||
; CHECK-NEXT: vmov r3, s14
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vldrw.u32 q1, [sp, #32] @ 16-byte Reload
|
||||
; CHECK-NEXT: vmov.f32 s2, s4
|
||||
; CHECK-NEXT: vmov.f32 s3, s5
|
||||
; CHECK-NEXT: vldrw.u32 q1, [sp, #48] @ 16-byte Reload
|
||||
; CHECK-NEXT: vldrw.u32 q1, [sp] @ 16-byte Reload
|
||||
; CHECK-NEXT: adc.w r12, r12, r2
|
||||
; CHECK-NEXT: vmov r2, s15
|
||||
; CHECK-NEXT: vmov r6, s2
|
||||
; CHECK-NEXT: vmov r5, s6
|
||||
; CHECK-NEXT: vmov r7, s4
|
||||
; CHECK-NEXT: adds r3, r3, r4
|
||||
; CHECK-NEXT: vmov r4, s23
|
||||
; CHECK-NEXT: adcs r0, r2
|
||||
; CHECK-NEXT: adds.w lr, lr, r3
|
||||
; CHECK-NEXT: adc.w r12, r12, r0
|
||||
; CHECK-NEXT: vmov r0, s26
|
||||
; CHECK-NEXT: vmov r2, s22
|
||||
; CHECK-NEXT: vmov r3, s27
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r2, r4, r3
|
||||
; CHECK-NEXT: vmov r3, s19
|
||||
; CHECK-NEXT: vmov r4, s3
|
||||
; CHECK-NEXT: adds r5, r5, r6
|
||||
; CHECK-NEXT: vmov r6, s16
|
||||
; CHECK-NEXT: adcs r4, r0
|
||||
; CHECK-NEXT: adds.w r9, r5, r2
|
||||
; CHECK-NEXT: vmov r5, s28
|
||||
; CHECK-NEXT: adc.w r8, r4, r3
|
||||
; CHECK-NEXT: vmov r2, s29
|
||||
; CHECK-NEXT: vmov r4, s17
|
||||
; CHECK-NEXT: vmov r6, s20
|
||||
; CHECK-NEXT: adcs r3, r4
|
||||
; CHECK-NEXT: adds r0, r0, r5
|
||||
; CHECK-NEXT: vmov r5, s24
|
||||
; CHECK-NEXT: adc.w r8, r3, r2
|
||||
; CHECK-NEXT: vmov r2, s25
|
||||
; CHECK-NEXT: vmov r4, s21
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: adds r5, r5, r6
|
||||
; CHECK-NEXT: vmov r6, s1
|
||||
; CHECK-NEXT: adcs r2, r4
|
||||
; CHECK-NEXT: vmov r4, s5
|
||||
; CHECK-NEXT: vmov r4, s17
|
||||
; CHECK-NEXT: adds r3, r3, r7
|
||||
; CHECK-NEXT: vmov r7, s14
|
||||
; CHECK-NEXT: vmov r7, s28
|
||||
; CHECK-NEXT: adcs r4, r6
|
||||
; CHECK-NEXT: adds r3, r3, r5
|
||||
; CHECK-NEXT: vmov r6, s22
|
||||
; CHECK-NEXT: adc.w r10, r4, r2
|
||||
; CHECK-NEXT: vmov r4, s23
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r9, r3
|
||||
; CHECK-NEXT: vmov r5, s15
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r8, r10
|
||||
; CHECK-NEXT: vmov r2, s26
|
||||
; CHECK-NEXT: vstrw.32 q1, [r1, #16]
|
||||
; CHECK-NEXT: vmov r6, s8
|
||||
; CHECK-NEXT: adcs r2, r4
|
||||
; CHECK-NEXT: vmov r4, s9
|
||||
; CHECK-NEXT: vmov.32 q0[0], r3
|
||||
; CHECK-NEXT: vmov r5, s29
|
||||
; CHECK-NEXT: vmov.32 q0[1], r2
|
||||
; CHECK-NEXT: vmov.32 q0[2], r0
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmov r3, s13
|
||||
; CHECK-NEXT: vmov.32 q0[3], r8
|
||||
; CHECK-NEXT: vmov r2, s5
|
||||
; CHECK-NEXT: vstrw.32 q0, [r1, #16]
|
||||
; CHECK-NEXT: adds r6, r6, r7
|
||||
; CHECK-NEXT: vmov r7, s27
|
||||
; CHECK-NEXT: adcs r4, r5
|
||||
; CHECK-NEXT: vmov r5, s11
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r2, r7, r5
|
||||
; CHECK-NEXT: vmov r5, s4
|
||||
; CHECK-NEXT: adds r0, r0, r5
|
||||
; CHECK-NEXT: adcs r2, r3
|
||||
; CHECK-NEXT: adds r0, r0, r6
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, lr
|
||||
; CHECK-NEXT: adc.w r0, r4, r2
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r12
|
||||
; CHECK-NEXT: adcs r2, r4
|
||||
; CHECK-NEXT: vmov.32 q0[0], r0
|
||||
; CHECK-NEXT: vmov.32 q0[1], r2
|
||||
; CHECK-NEXT: vmov.32 q0[2], lr
|
||||
; CHECK-NEXT: vmov.32 q0[3], r12
|
||||
; CHECK-NEXT: vstrw.32 q0, [r1]
|
||||
; CHECK-NEXT: add sp, #64
|
||||
; CHECK-NEXT: add sp, #72
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
|
||||
entry:
|
||||
%l1 = load <16 x i64>, <16 x i64>* %src, align 4
|
||||
%s1 = shufflevector <16 x i64> %l1, <16 x i64> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
|
||||
|
|
|
@ -6,11 +6,13 @@ define arm_aapcs_vfpcc <2 x i32> @vmulhs_v2i32(<2 x i32> %s0, <2 x i32> %s1) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmullb.s32 q2, q0, q1
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov r1, s11
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q0[0], r0
|
||||
; CHECK-NEXT: asrs r0, r0, #31
|
||||
; CHECK-NEXT: asrs r1, r1, #31
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q0[1], r0
|
||||
; CHECK-NEXT: vmov r0, s11
|
||||
; CHECK-NEXT: vmov.32 q0[2], r0
|
||||
; CHECK-NEXT: asrs r0, r0, #31
|
||||
; CHECK-NEXT: vmov.32 q0[3], r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%s0s = sext <2 x i32> %s0 to <2 x i64>
|
||||
|
@ -46,28 +48,28 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i32> @vmulhs_v4i32(<4 x i32> %s0, <4 x i32> %s1) {
|
||||
; CHECK-LABEL: vmulhs_v4i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vmov.f32 s8, s4
|
||||
; CHECK-NEXT: vmov.f32 s12, s0
|
||||
; CHECK-NEXT: vmov.f32 s10, s5
|
||||
; CHECK-NEXT: vmov.f32 s14, s1
|
||||
; CHECK-NEXT: vmov r0, s8
|
||||
; CHECK-NEXT: vmov r1, s12
|
||||
; CHECK-NEXT: vmov.f32 s16, s6
|
||||
; CHECK-NEXT: vmov.f32 s18, s7
|
||||
; CHECK-NEXT: vmov.f32 s10, s5
|
||||
; CHECK-NEXT: vmov r2, s12
|
||||
; CHECK-NEXT: vmov r1, s14
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov.f32 s12, s6
|
||||
; CHECK-NEXT: vmov.f32 s14, s7
|
||||
; CHECK-NEXT: vmov.f32 s4, s2
|
||||
; CHECK-NEXT: vmov.f32 s6, s3
|
||||
; CHECK-NEXT: vmullb.s32 q5, q1, q4
|
||||
; CHECK-NEXT: vmullb.s32 q0, q1, q3
|
||||
; CHECK-NEXT: smmul r0, r1, r0
|
||||
; CHECK-NEXT: vmov r1, s21
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov r1, s14
|
||||
; CHECK-NEXT: smmul r0, r1, r0
|
||||
; CHECK-NEXT: vmov r1, s23
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vmov r1, s8
|
||||
; CHECK-NEXT: smmul r1, r2, r1
|
||||
; CHECK-NEXT: vmov.32 q2[0], r1
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: vmov q0, q2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%s0s = sext <4 x i32> %s0 to <4 x i64>
|
||||
|
@ -140,18 +142,21 @@ define arm_aapcs_vfpcc <8 x i16> @vmulhs_v8i16(<8 x i16> %s0, <8 x i16> %s1) {
|
|||
; CHECK-LABEL: vmulhs_v8i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[0]
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[2]
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[1]
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[3]
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[2]
|
||||
; CHECK-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[3]
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[0]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[2]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[1]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[3]
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[6]
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[2]
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[3]
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vmullb.s16 q2, q3, q2
|
||||
; CHECK-NEXT: vshr.s32 q3, q2, #16
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
|
@ -163,16 +168,21 @@ define arm_aapcs_vfpcc <8 x i16> @vmulhs_v8i16(<8 x i16> %s0, <8 x i16> %s1) {
|
|||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: vmov.16 q2[3], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[4]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[5]
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[7]
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[6]
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[7]
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[4]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[6]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[5]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[7]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[6]
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[7]
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: vmullb.s16 q0, q1, q3
|
||||
; CHECK-NEXT: vshr.s32 q0, q0, #16
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
|
@ -198,18 +208,21 @@ define arm_aapcs_vfpcc <8 x i16> @vmulhu_v8i16(<8 x i16> %s0, <8 x i16> %s1) {
|
|||
; CHECK-LABEL: vmulhu_v8i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[0]
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[2]
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[1]
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[3]
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[2]
|
||||
; CHECK-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[3]
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[0]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[2]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[1]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[3]
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[6]
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[2]
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[3]
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vmullb.u16 q2, q3, q2
|
||||
; CHECK-NEXT: vshr.u32 q3, q2, #16
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
|
@ -221,16 +234,21 @@ define arm_aapcs_vfpcc <8 x i16> @vmulhu_v8i16(<8 x i16> %s0, <8 x i16> %s1) {
|
|||
; CHECK-NEXT: vmov r0, s15
|
||||
; CHECK-NEXT: vmov.16 q2[3], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[4]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[5]
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[7]
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[6]
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[7]
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[4]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[6]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[5]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[7]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[6]
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[7]
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: vmullb.u16 q0, q1, q3
|
||||
; CHECK-NEXT: vshr.u32 q0, q0, #16
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
|
|
|
@ -4,33 +4,38 @@
|
|||
define arm_aapcs_vfpcc void @test32(i32* noalias nocapture readonly %x, i32* noalias nocapture readonly %y, i32* nocapture %z, i32 %n) {
|
||||
; CHECK-LABEL: test32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: .save {r5, lr}
|
||||
; CHECK-NEXT: push {r5, lr}
|
||||
; CHECK-NEXT: cmp r3, #1
|
||||
; CHECK-NEXT: blt .LBB0_2
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: poplt {r5, pc}
|
||||
; CHECK-NEXT: .LBB0_1: @ %vector.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0], #16
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r1], #16
|
||||
; CHECK-NEXT: subs r3, #4
|
||||
; CHECK-NEXT: vmullt.s32 q0, q2, q1
|
||||
; CHECK-NEXT: vmullb.s32 q3, q2, q1
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: vmov r12, s0
|
||||
; CHECK-NEXT: vmov r7, s3
|
||||
; CHECK-NEXT: lsrl r12, r5, #31
|
||||
; CHECK-NEXT: vmov r4, s2
|
||||
; CHECK-NEXT: lsrl r4, r7, #31
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r4, r12
|
||||
; CHECK-NEXT: vmov r12, s12
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r7, r5
|
||||
; CHECK-NEXT: vmullt.s32 q3, q2, q1
|
||||
; CHECK-NEXT: vmov r5, s13
|
||||
; CHECK-NEXT: vmov r7, s15
|
||||
; CHECK-NEXT: vmov r12, s12
|
||||
; CHECK-NEXT: lsrl r12, r5, #31
|
||||
; CHECK-NEXT: vmov r4, s14
|
||||
; CHECK-NEXT: lsrl r4, r7, #31
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r4, r12
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r7, r5
|
||||
; CHECK-NEXT: vmov.32 q0[0], r12
|
||||
; CHECK-NEXT: vmov r12, s14
|
||||
; CHECK-NEXT: vmov.32 q0[1], r5
|
||||
; CHECK-NEXT: vmov r5, s15
|
||||
; CHECK-NEXT: lsrl r12, r5, #31
|
||||
; CHECK-NEXT: vmullb.s32 q3, q2, q1
|
||||
; CHECK-NEXT: vmov.32 q0[2], r12
|
||||
; CHECK-NEXT: vmov r12, s12
|
||||
; CHECK-NEXT: vmov.32 q0[3], r5
|
||||
; CHECK-NEXT: vmov r5, s13
|
||||
; CHECK-NEXT: lsrl r12, r5, #31
|
||||
; CHECK-NEXT: vmov.32 q1[0], r12
|
||||
; CHECK-NEXT: vmov r12, s14
|
||||
; CHECK-NEXT: vmov.32 q1[1], r5
|
||||
; CHECK-NEXT: vmov r5, s15
|
||||
; CHECK-NEXT: lsrl r12, r5, #31
|
||||
; CHECK-NEXT: vmov.32 q1[2], r12
|
||||
; CHECK-NEXT: vmov.32 q1[3], r5
|
||||
; CHECK-NEXT: vmov.f32 s8, s6
|
||||
; CHECK-NEXT: vmov.f32 s9, s7
|
||||
; CHECK-NEXT: vmov.f32 s6, s0
|
||||
|
@ -42,8 +47,8 @@ define arm_aapcs_vfpcc void @test32(i32* noalias nocapture readonly %x, i32* noa
|
|||
; CHECK-NEXT: vmov.f32 s7, s10
|
||||
; CHECK-NEXT: vstrb.8 q1, [r2], #16
|
||||
; CHECK-NEXT: bne .LBB0_1
|
||||
; CHECK-NEXT: .LBB0_2: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: pop {r5, pc}
|
||||
entry:
|
||||
%0 = and i32 %n, 3
|
||||
%cmp = icmp eq i32 %0, 0
|
||||
|
|
|
@ -74,18 +74,21 @@ define arm_aapcs_vfpcc <8 x i16> @vqdmulh_i16_c(<8 x i16> %s0, <8 x i16> %s1) {
|
|||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: vmov q2, q0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[0]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[2]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q0[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q2[1]
|
||||
; CHECK-NEXT: vmov.u16 r1, q2[3]
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q0[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q2[2]
|
||||
; CHECK-NEXT: vmov.32 q0[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q2[3]
|
||||
; CHECK-NEXT: vmov.32 q0[3], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[0]
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[2]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q3[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[1]
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[3]
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: vmov.u16 r1, q2[6]
|
||||
; CHECK-NEXT: vmov.32 q3[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[2]
|
||||
; CHECK-NEXT: vmov.32 q3[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[3]
|
||||
; CHECK-NEXT: vmov.32 q3[3], r0
|
||||
; CHECK-NEXT: vmullb.s16 q0, q3, q0
|
||||
; CHECK-NEXT: vmov.i32 q3, #0x7fff
|
||||
; CHECK-NEXT: vshl.i32 q0, q0, #10
|
||||
|
@ -101,16 +104,21 @@ define arm_aapcs_vfpcc <8 x i16> @vqdmulh_i16_c(<8 x i16> %s0, <8 x i16> %s1) {
|
|||
; CHECK-NEXT: vmov r0, s19
|
||||
; CHECK-NEXT: vmov.16 q0[3], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q2[4]
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q4[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q2[5]
|
||||
; CHECK-NEXT: vmov.u16 r1, q2[7]
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q4[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q2[6]
|
||||
; CHECK-NEXT: vmov.32 q4[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q2[7]
|
||||
; CHECK-NEXT: vmov.32 q4[3], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[4]
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[6]
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[5]
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[7]
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmov.32 q2[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[6]
|
||||
; CHECK-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[7]
|
||||
; CHECK-NEXT: vmov.32 q2[3], r0
|
||||
; CHECK-NEXT: vmullb.s16 q1, q2, q4
|
||||
; CHECK-NEXT: vshl.i32 q1, q1, #10
|
||||
; CHECK-NEXT: vshr.s32 q1, q1, #10
|
||||
|
|
|
@ -165,28 +165,30 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_smaxmin(<2 x i64> %s0) {
|
|||
; CHECK-LABEL: vqmovni64_smaxmin:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: mvn r12, #-2147483648
|
||||
; CHECK-NEXT: mvn r3, #-2147483648
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: subs.w r2, r2, r12
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: subs r2, r2, r3
|
||||
; CHECK-NEXT: sbcs r1, r1, #0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov.32 q1[0], r1
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: subs r2, r2, r3
|
||||
; CHECK-NEXT: mov.w r3, #-1
|
||||
; CHECK-NEXT: sbcs r1, r1, #0
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: subs.w r3, r3, r12
|
||||
; CHECK-NEXT: mov.w r12, #-1
|
||||
; CHECK-NEXT: sbcs r2, r2, #0
|
||||
; CHECK-NEXT: mov.w r2, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: csetm r2, ne
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r2, r1
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r2, r1
|
||||
; CHECK-NEXT: vmov.32 q1[2], r1
|
||||
; CHECK-NEXT: vmov.32 q1[3], r1
|
||||
; CHECK-NEXT: adr r1, .LCPI12_0
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r1]
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
|
@ -194,23 +196,25 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_smaxmin(<2 x i64> %s0) {
|
|||
; CHECK-NEXT: vorr q0, q0, q2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: rsbs.w r2, r2, #-2147483648
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: sbcs.w r1, r12, r1
|
||||
; CHECK-NEXT: sbcs.w r1, r3, r1
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: rsbs.w r3, r3, #-2147483648
|
||||
; CHECK-NEXT: sbcs.w r2, r12, r2
|
||||
; CHECK-NEXT: vmov.32 q1[0], r1
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: rsbs.w r2, r2, #-2147483648
|
||||
; CHECK-NEXT: sbcs.w r1, r3, r1
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r0, r1
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r0, r1
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: adr r0, .LCPI12_1
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0]
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
|
@ -241,28 +245,30 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_sminmax(<2 x i64> %s0) {
|
|||
; CHECK-LABEL: vqmovni64_sminmax:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: mov.w r12, #-1
|
||||
; CHECK-NEXT: mov.w r3, #-1
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: rsbs.w r2, r2, #-2147483648
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: sbcs.w r1, r12, r1
|
||||
; CHECK-NEXT: sbcs.w r1, r3, r1
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: rsbs.w r3, r3, #-2147483648
|
||||
; CHECK-NEXT: sbcs.w r2, r12, r2
|
||||
; CHECK-NEXT: mvn r12, #-2147483648
|
||||
; CHECK-NEXT: mov.w r2, #0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r1
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: rsbs.w r2, r2, #-2147483648
|
||||
; CHECK-NEXT: sbcs.w r1, r3, r1
|
||||
; CHECK-NEXT: mvn r3, #-2147483648
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: csetm r2, ne
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r2, r1
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r2, r1
|
||||
; CHECK-NEXT: movlt r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov.32 q1[2], r1
|
||||
; CHECK-NEXT: vmov.32 q1[3], r1
|
||||
; CHECK-NEXT: adr r1, .LCPI13_0
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r1]
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
|
@ -270,23 +276,25 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_sminmax(<2 x i64> %s0) {
|
|||
; CHECK-NEXT: vorr q0, q0, q2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: subs.w r2, r2, r12
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: subs r2, r2, r3
|
||||
; CHECK-NEXT: sbcs r1, r1, #0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: subs.w r3, r3, r12
|
||||
; CHECK-NEXT: sbcs r2, r2, #0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r1
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: subs r2, r2, r3
|
||||
; CHECK-NEXT: sbcs r1, r1, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r0, r1
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r0, r1
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: adr r0, .LCPI13_1
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0]
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
|
@ -320,23 +328,25 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_umaxmin(<2 x i64> %s0) {
|
|||
; CHECK-NEXT: movs r2, #0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov.i64 q2, #0xffffffff
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: subs.w r1, r1, #-1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: sbcs r0, r0, #0
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: mov.w r0, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: subs.w r3, r3, #-1
|
||||
; CHECK-NEXT: sbcs r1, r1, #0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: subs.w r1, r1, #-1
|
||||
; CHECK-NEXT: sbcs r0, r0, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: vbic q2, q2, q1
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: vorr q0, q0, q2
|
||||
|
@ -354,23 +364,25 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_uminmax(<2 x i64> %s0) {
|
|||
; CHECK-NEXT: movs r2, #0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov.i64 q2, #0xffffffff
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: subs.w r1, r1, #-1
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: sbcs r0, r0, #0
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: mov.w r0, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: subs.w r3, r3, #-1
|
||||
; CHECK-NEXT: sbcs r1, r1, #0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: subs.w r1, r1, #-1
|
||||
; CHECK-NEXT: sbcs r0, r0, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: vbic q2, q2, q1
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: vorr q0, q0, q2
|
||||
|
|
|
@ -180,65 +180,71 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @vqshrni64_smaxmin(<2 x i64> %so) {
|
||||
; CHECK-LABEL: vqshrni64_smaxmin:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: mvn r12, #-2147483648
|
||||
; CHECK-NEXT: .save {r5, lr}
|
||||
; CHECK-NEXT: push {r5, lr}
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: mvn lr, #-2147483648
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: mov.w lr, #0
|
||||
; CHECK-NEXT: asrl r2, r1, #3
|
||||
; CHECK-NEXT: vmov r4, s2
|
||||
; CHECK-NEXT: subs.w r3, r2, r12
|
||||
; CHECK-NEXT: sbcs r3, r1, #0
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: asrl r2, r5, #3
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: csetm r5, ne
|
||||
; CHECK-NEXT: asrl r4, r3, #3
|
||||
; CHECK-NEXT: subs.w r0, r4, r12
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r4, r2
|
||||
; CHECK-NEXT: sbcs r0, r3, #0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r3, r1
|
||||
; CHECK-NEXT: subs.w r0, r2, lr
|
||||
; CHECK-NEXT: vmov.32 q2[0], r2
|
||||
; CHECK-NEXT: sbcs r0, r5, #0
|
||||
; CHECK-NEXT: vmov.32 q2[1], r5
|
||||
; CHECK-NEXT: mov.w r0, #0
|
||||
; CHECK-NEXT: mov.w r2, #-1
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r5
|
||||
; CHECK-NEXT: adr r0, .LCPI12_0
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vbic q1, q1, q0
|
||||
; CHECK-NEXT: vand q0, q2, q0
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: asrl r0, r3, #3
|
||||
; CHECK-NEXT: subs.w r1, r0, lr
|
||||
; CHECK-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-NEXT: sbcs r1, r3, #0
|
||||
; CHECK-NEXT: vmov.32 q2[3], r3
|
||||
; CHECK-NEXT: mov.w r1, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov.32 q1[2], r1
|
||||
; CHECK-NEXT: vmov.32 q1[3], r1
|
||||
; CHECK-NEXT: adr r1, .LCPI12_0
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r1]
|
||||
; CHECK-NEXT: vbic q0, q0, q1
|
||||
; CHECK-NEXT: vand q1, q2, q1
|
||||
; CHECK-NEXT: vorr q0, q1, q0
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: rsbs.w r1, r1, #-2147483648
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: sbcs.w r0, r2, r0
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: mov.w r0, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: rsbs.w r3, r3, #-2147483648
|
||||
; CHECK-NEXT: sbcs.w r1, r2, r1
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: rsbs.w r1, r1, #-2147483648
|
||||
; CHECK-NEXT: sbcs.w r0, r2, r0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt.w lr, #1
|
||||
; CHECK-NEXT: cmp.w lr, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: movlt.w r12, #1
|
||||
; CHECK-NEXT: cmp.w r12, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: adr r0, .LCPI12_1
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0]
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: vbic q2, q2, q1
|
||||
; CHECK-NEXT: vorr q0, q0, q2
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: pop {r5, pc}
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI12_0:
|
||||
|
@ -263,65 +269,71 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @vqshrni64_sminmax(<2 x i64> %so) {
|
||||
; CHECK-LABEL: vqshrni64_sminmax:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: vmov r1, s1
|
||||
; CHECK-NEXT: mov.w r12, #-1
|
||||
; CHECK-NEXT: mov.w lr, #-1
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: mov.w lr, #0
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: asrl r2, r1, #3
|
||||
; CHECK-NEXT: vmov r4, s2
|
||||
; CHECK-NEXT: rsbs.w r3, r2, #-2147483648
|
||||
; CHECK-NEXT: sbcs.w r3, r12, r1
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: asrl r4, r3, #3
|
||||
; CHECK-NEXT: rsbs.w r5, r4, #-2147483648
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r4, r2
|
||||
; CHECK-NEXT: sbcs.w r5, r12, r3
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r3, r1
|
||||
; CHECK-NEXT: mov.w r5, #0
|
||||
; CHECK-NEXT: rsbs.w r0, r2, #-2147483648
|
||||
; CHECK-NEXT: vmov.32 q2[0], r2
|
||||
; CHECK-NEXT: sbcs.w r0, lr, r1
|
||||
; CHECK-NEXT: vmov.32 q2[1], r1
|
||||
; CHECK-NEXT: mov.w r0, #0
|
||||
; CHECK-NEXT: mvn r2, #-2147483648
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r5, #1
|
||||
; CHECK-NEXT: cmp r5, #0
|
||||
; CHECK-NEXT: csetm r5, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r5, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r5, r0
|
||||
; CHECK-NEXT: adr r0, .LCPI13_0
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vbic q1, q1, q0
|
||||
; CHECK-NEXT: vand q0, q2, q0
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: asrl r0, r3, #3
|
||||
; CHECK-NEXT: rsbs.w r4, r0, #-2147483648
|
||||
; CHECK-NEXT: vmov.32 q2[2], r0
|
||||
; CHECK-NEXT: sbcs.w r4, lr, r3
|
||||
; CHECK-NEXT: vmov.32 q2[3], r3
|
||||
; CHECK-NEXT: mov.w r4, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r4, #1
|
||||
; CHECK-NEXT: cmp r4, #0
|
||||
; CHECK-NEXT: csetm r4, ne
|
||||
; CHECK-NEXT: vmov.32 q1[2], r4
|
||||
; CHECK-NEXT: vmov.32 q1[3], r4
|
||||
; CHECK-NEXT: adr r4, .LCPI13_0
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r4]
|
||||
; CHECK-NEXT: vbic q0, q0, q1
|
||||
; CHECK-NEXT: vand q1, q2, q1
|
||||
; CHECK-NEXT: vorr q0, q1, q0
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: subs r1, r1, r2
|
||||
; CHECK-NEXT: vmov r1, s3
|
||||
; CHECK-NEXT: sbcs r0, r0, #0
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: mov.w r0, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt r0, #1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: subs r2, r3, r2
|
||||
; CHECK-NEXT: sbcs r1, r1, #0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: subs r1, r1, r2
|
||||
; CHECK-NEXT: sbcs r0, r0, #0
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: movlt.w lr, #1
|
||||
; CHECK-NEXT: cmp.w lr, #0
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: movlt.w r12, #1
|
||||
; CHECK-NEXT: cmp.w r12, #0
|
||||
; CHECK-NEXT: csetm r0, ne
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: adr r0, .LCPI13_1
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0]
|
||||
; CHECK-NEXT: vand q0, q0, q1
|
||||
; CHECK-NEXT: vbic q2, q2, q1
|
||||
; CHECK-NEXT: vorr q0, q0, q2
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI13_0:
|
||||
|
@ -346,37 +358,41 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @vqshrni64_umaxmin(<2 x i64> %so) {
|
||||
; CHECK-LABEL: vqshrni64_umaxmin:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: movs r2, #0
|
||||
; CHECK-NEXT: .save {r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r5, r6, r7, lr}
|
||||
; CHECK-NEXT: vmov r7, s1
|
||||
; CHECK-NEXT: movs r1, #0
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov.i64 q1, #0xffffffff
|
||||
; CHECK-NEXT: lsrl r0, r5, #3
|
||||
; CHECK-NEXT: vmov r4, s2
|
||||
; CHECK-NEXT: subs.w r3, r0, #-1
|
||||
; CHECK-NEXT: sbcs r3, r5, #0
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: lsrl r0, r7, #3
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: csetm r12, ne
|
||||
; CHECK-NEXT: lsrl r4, r3, #3
|
||||
; CHECK-NEXT: subs.w r1, r4, #-1
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r4, r0
|
||||
; CHECK-NEXT: sbcs r1, r3, #0
|
||||
; CHECK-NEXT: subs.w r2, r0, #-1
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: sbcs r2, r7, #0
|
||||
; CHECK-NEXT: vmov.32 q2[1], r7
|
||||
; CHECK-NEXT: mov.w r2, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r3, r5
|
||||
; CHECK-NEXT: csetm r2, ne
|
||||
; CHECK-NEXT: vmov.32 q1[0], r2
|
||||
; CHECK-NEXT: vmov.32 q1[1], r2
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: lsrl r2, r3, #3
|
||||
; CHECK-NEXT: vmov.i64 q0, #0xffffffff
|
||||
; CHECK-NEXT: subs.w r5, r2, #-1
|
||||
; CHECK-NEXT: vmov.32 q2[2], r2
|
||||
; CHECK-NEXT: sbcs r5, r3, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: vmov.32 q2[3], r3
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r12
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r12
|
||||
; CHECK-NEXT: vbic q1, q1, q0
|
||||
; CHECK-NEXT: vand q0, q2, q0
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: vmov.32 q1[2], r1
|
||||
; CHECK-NEXT: vmov.32 q1[3], r1
|
||||
; CHECK-NEXT: vbic q0, q0, q1
|
||||
; CHECK-NEXT: vand q1, q2, q1
|
||||
; CHECK-NEXT: vorr q0, q1, q0
|
||||
; CHECK-NEXT: pop {r5, r6, r7, pc}
|
||||
entry:
|
||||
%s0 = lshr <2 x i64> %so, <i64 3, i64 3>
|
||||
%c1 = icmp ult <2 x i64> %s0, <i64 4294967295, i64 4294967295>
|
||||
|
@ -387,37 +403,41 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @vqshrni64_uminmax(<2 x i64> %so) {
|
||||
; CHECK-LABEL: vqshrni64_uminmax:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov r5, s1
|
||||
; CHECK-NEXT: movs r2, #0
|
||||
; CHECK-NEXT: .save {r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r5, r6, r7, lr}
|
||||
; CHECK-NEXT: vmov r7, s1
|
||||
; CHECK-NEXT: movs r1, #0
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov.i64 q1, #0xffffffff
|
||||
; CHECK-NEXT: lsrl r0, r5, #3
|
||||
; CHECK-NEXT: vmov r4, s2
|
||||
; CHECK-NEXT: subs.w r3, r0, #-1
|
||||
; CHECK-NEXT: sbcs r3, r5, #0
|
||||
; CHECK-NEXT: mov.w r3, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: lsrl r0, r7, #3
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: csetm r12, ne
|
||||
; CHECK-NEXT: lsrl r4, r3, #3
|
||||
; CHECK-NEXT: subs.w r1, r4, #-1
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r4, r0
|
||||
; CHECK-NEXT: sbcs r1, r3, #0
|
||||
; CHECK-NEXT: subs.w r2, r0, #-1
|
||||
; CHECK-NEXT: vmov.32 q2[0], r0
|
||||
; CHECK-NEXT: sbcs r2, r7, #0
|
||||
; CHECK-NEXT: vmov.32 q2[1], r7
|
||||
; CHECK-NEXT: mov.w r2, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r2, #1
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r3, r5
|
||||
; CHECK-NEXT: csetm r2, ne
|
||||
; CHECK-NEXT: vmov.32 q1[0], r2
|
||||
; CHECK-NEXT: vmov.32 q1[1], r2
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: lsrl r2, r3, #3
|
||||
; CHECK-NEXT: vmov.i64 q0, #0xffffffff
|
||||
; CHECK-NEXT: subs.w r5, r2, #-1
|
||||
; CHECK-NEXT: vmov.32 q2[2], r2
|
||||
; CHECK-NEXT: sbcs r5, r3, #0
|
||||
; CHECK-NEXT: it lo
|
||||
; CHECK-NEXT: movlo r1, #1
|
||||
; CHECK-NEXT: cmp r1, #0
|
||||
; CHECK-NEXT: vmov.32 q2[3], r3
|
||||
; CHECK-NEXT: csetm r1, ne
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r12
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r12
|
||||
; CHECK-NEXT: vbic q1, q1, q0
|
||||
; CHECK-NEXT: vand q0, q2, q0
|
||||
; CHECK-NEXT: vorr q0, q0, q1
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: vmov.32 q1[2], r1
|
||||
; CHECK-NEXT: vmov.32 q1[3], r1
|
||||
; CHECK-NEXT: vbic q0, q0, q1
|
||||
; CHECK-NEXT: vand q1, q2, q1
|
||||
; CHECK-NEXT: vorr q0, q1, q0
|
||||
; CHECK-NEXT: pop {r5, r6, r7, pc}
|
||||
entry:
|
||||
%s0 = lshr <2 x i64> %so, <i64 3, i64 3>
|
||||
%c2 = icmp ult <2 x i64> %s0, <i64 4294967295, i64 4294967295>
|
||||
|
|
|
@ -6,19 +6,21 @@
|
|||
define void @vst2_v2i32(<2 x i32> *%src, <4 x i32> *%dst) {
|
||||
; CHECK-LABEL: vst2_v2i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: ldrd r12, r3, [r0]
|
||||
; CHECK-NEXT: ldrd r2, r0, [r0, #8]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r12
|
||||
; CHECK-NEXT: vmov.f64 d2, d1
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r0, r2
|
||||
; CHECK-NEXT: vmov.f32 s5, s3
|
||||
; CHECK-NEXT: vmov.f32 s2, s8
|
||||
; CHECK-NEXT: vmov.f32 s3, s9
|
||||
; CHECK-NEXT: vmov.f32 s6, s10
|
||||
; CHECK-NEXT: vmov.f32 s1, s2
|
||||
; CHECK-NEXT: vmov.f32 s7, s11
|
||||
; CHECK-NEXT: ldrd r2, r12, [r0]
|
||||
; CHECK-NEXT: ldrd r3, r0, [r0, #8]
|
||||
; CHECK-NEXT: vmov.32 q0[0], r2
|
||||
; CHECK-NEXT: vmov.32 q1[0], r3
|
||||
; CHECK-NEXT: vmov.32 q0[2], r12
|
||||
; CHECK-NEXT: vmov.f64 d4, d1
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.f32 s9, s3
|
||||
; CHECK-NEXT: vmov.f32 s2, s4
|
||||
; CHECK-NEXT: vmov.f32 s3, s6
|
||||
; CHECK-NEXT: vmov.f32 s3, s5
|
||||
; CHECK-NEXT: vmov.f32 s10, s6
|
||||
; CHECK-NEXT: vmov.f32 s1, s2
|
||||
; CHECK-NEXT: vmov.f32 s11, s7
|
||||
; CHECK-NEXT: vmov.f32 s2, s8
|
||||
; CHECK-NEXT: vmov.f32 s3, s10
|
||||
; CHECK-NEXT: vstrw.32 q0, [r1]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
|
@ -111,12 +113,14 @@ entry:
|
|||
define void @vst2_v2i16(<2 x i16> *%src, <4 x i16> *%dst) {
|
||||
; CHECK-LABEL: vst2_v2i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: ldrh r2, [r0]
|
||||
; CHECK-NEXT: ldrh r3, [r0, #2]
|
||||
; CHECK-NEXT: ldrh.w r12, [r0, #4]
|
||||
; CHECK-NEXT: ldrh r0, [r0, #6]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r12
|
||||
; CHECK-NEXT: ldrh r3, [r0]
|
||||
; CHECK-NEXT: ldrh r2, [r0, #4]
|
||||
; CHECK-NEXT: vmov.32 q0[0], r3
|
||||
; CHECK-NEXT: ldrh.w r12, [r0, #6]
|
||||
; CHECK-NEXT: ldrh r0, [r0, #2]
|
||||
; CHECK-NEXT: vmov.32 q0[1], r2
|
||||
; CHECK-NEXT: vmov.32 q0[2], r0
|
||||
; CHECK-NEXT: vmov.32 q0[3], r12
|
||||
; CHECK-NEXT: vstrh.32 q0, [r1]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
|
@ -193,11 +197,13 @@ define void @vst2_v2i8(<2 x i8> *%src, <4 x i8> *%dst) {
|
|||
; CHECK-LABEL: vst2_v2i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: ldrb r2, [r0]
|
||||
; CHECK-NEXT: ldrb r3, [r0, #1]
|
||||
; CHECK-NEXT: ldrb.w r12, [r0, #2]
|
||||
; CHECK-NEXT: ldrb r3, [r0, #2]
|
||||
; CHECK-NEXT: vmov.32 q0[0], r2
|
||||
; CHECK-NEXT: ldrb.w r12, [r0, #1]
|
||||
; CHECK-NEXT: vmov.32 q0[1], r3
|
||||
; CHECK-NEXT: ldrb r0, [r0, #3]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r12
|
||||
; CHECK-NEXT: vmov.32 q0[2], r12
|
||||
; CHECK-NEXT: vmov.32 q0[3], r0
|
||||
; CHECK-NEXT: vstrb.32 q0, [r1]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
|
|
|
@ -8,13 +8,14 @@ define void @vst3_v2i32(<2 x i32> *%src, <6 x i32> *%dst) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: ldrd lr, r12, [r0]
|
||||
; CHECK-NEXT: ldrd r3, r2, [r0, #8]
|
||||
; CHECK-NEXT: ldm.w r0, {r2, r3, r12, lr}
|
||||
; CHECK-NEXT: ldrd r4, r0, [r0, #16]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, lr
|
||||
; CHECK-NEXT: vmov.32 q1[0], r2
|
||||
; CHECK-NEXT: vmov.32 q1[1], r3
|
||||
; CHECK-NEXT: vmov.32 q0[0], r4
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r2, r12
|
||||
; CHECK-NEXT: vmov.32 q1[2], r12
|
||||
; CHECK-NEXT: vmov.32 q0[1], r0
|
||||
; CHECK-NEXT: vmov.32 q1[3], lr
|
||||
; CHECK-NEXT: vmov.f32 s8, s7
|
||||
; CHECK-NEXT: vmov.f32 s10, s1
|
||||
; CHECK-NEXT: vmov r2, s8
|
||||
|
@ -301,16 +302,18 @@ define void @vst3_v2i16(<2 x i16> *%src, <6 x i16> *%dst) {
|
|||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: ldrh r2, [r0, #6]
|
||||
; CHECK-NEXT: ldrh.w lr, [r0, #4]
|
||||
; CHECK-NEXT: ldrh r3, [r0, #4]
|
||||
; CHECK-NEXT: ldrh.w r12, [r0, #8]
|
||||
; CHECK-NEXT: vmov.16 q0[4], r2
|
||||
; CHECK-NEXT: ldrh r3, [r0, #2]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r2, lr
|
||||
; CHECK-NEXT: ldrh.w lr, [r0, #2]
|
||||
; CHECK-NEXT: vmov.32 q1[0], r3
|
||||
; CHECK-NEXT: ldrh r4, [r0]
|
||||
; CHECK-NEXT: vmov.32 q1[2], r2
|
||||
; CHECK-NEXT: ldrh r0, [r0, #10]
|
||||
; CHECK-NEXT: vmov.16 q0[5], r0
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r4
|
||||
; CHECK-NEXT: vmov.32 q0[0], r4
|
||||
; CHECK-NEXT: vmov.32 q0[2], lr
|
||||
; CHECK-NEXT: vmov.f32 s1, s4
|
||||
; CHECK-NEXT: vdup.32 q1, r12
|
||||
; CHECK-NEXT: vmov.f32 s3, s2
|
||||
|
@ -686,8 +689,9 @@ define void @vst3_v2i8(<2 x i8> *%src, <6 x i8> *%dst) {
|
|||
; CHECK-NEXT: ldrb r2, [r0]
|
||||
; CHECK-NEXT: movs r6, #0
|
||||
; CHECK-NEXT: ldrb r3, [r0, #1]
|
||||
; CHECK-NEXT: vmov.32 q0[0], r2
|
||||
; CHECK-NEXT: ldrb.w r12, [r0, #2]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
|
||||
; CHECK-NEXT: vmov.32 q0[2], r3
|
||||
; CHECK-NEXT: ldrb.w lr, [r0, #3]
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: ldrb r5, [r0, #5]
|
||||
|
@ -1457,21 +1461,23 @@ entry:
|
|||
define void @vst3_v4f16(<4 x half> *%src, <12 x half> *%dst) {
|
||||
; CHECK-LABEL: vst3_v4f16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: ldrd lr, r12, [r0]
|
||||
; CHECK-NEXT: ldrd r3, r2, [r0, #8]
|
||||
; CHECK-NEXT: ldrd r4, r0, [r0, #16]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, lr
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r2, r12
|
||||
; CHECK-NEXT: vmov.32 q1[0], r4
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: ldm.w r0, {r2, r3, r12, lr}
|
||||
; CHECK-NEXT: vmov.32 q0[0], r2
|
||||
; CHECK-NEXT: vmov.32 q0[1], r3
|
||||
; CHECK-NEXT: vmov.32 q0[2], r12
|
||||
; CHECK-NEXT: vmov.32 q0[3], lr
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmovx.f16 s12, s0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov.16 q2[0], r3
|
||||
; CHECK-NEXT: vmov.16 q2[1], r2
|
||||
; CHECK-NEXT: ldrd r2, r0, [r0, #16]
|
||||
; CHECK-NEXT: vmovx.f16 s0, s3
|
||||
; CHECK-NEXT: vmov.32 q1[0], r2
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmovx.f16 s12, s0
|
||||
; CHECK-NEXT: vmov.16 q2[2], r0
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmovx.f16 s12, s2
|
||||
|
@ -1480,7 +1486,6 @@ define void @vst3_v4f16(<4 x half> *%src, <12 x half> *%dst) {
|
|||
; CHECK-NEXT: vmovx.f16 s12, s4
|
||||
; CHECK-NEXT: vmov.16 q2[4], r0
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmovx.f16 s0, s3
|
||||
; CHECK-NEXT: vmov.16 q2[5], r0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: vmov.16 q2[6], r0
|
||||
|
@ -1500,7 +1505,7 @@ define void @vst3_v4f16(<4 x half> *%src, <12 x half> *%dst) {
|
|||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: vmov r2, s8
|
||||
; CHECK-NEXT: strd r2, r0, [r1, #16]
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%s1 = getelementptr <4 x half>, <4 x half>* %src, i32 0
|
||||
%l1 = load <4 x half>, <4 x half>* %s1, align 4
|
||||
|
|
|
@ -8,16 +8,18 @@ define void @vst4_v2i32(<2 x i32> *%src, <8 x i32> *%dst) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: ldrd lr, r12, [r0]
|
||||
; CHECK-NEXT: ldrd r3, r2, [r0, #8]
|
||||
; CHECK-NEXT: ldm.w r0, {r2, r3, r12, lr}
|
||||
; CHECK-NEXT: ldrd r4, r0, [r0, #16]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r0, r4
|
||||
; CHECK-NEXT: vmov.32 q1[0], r4
|
||||
; CHECK-NEXT: vmov.32 q1[2], r0
|
||||
; CHECK-NEXT: vmov.f64 d0, d2
|
||||
; CHECK-NEXT: vmov.f32 s1, s6
|
||||
; CHECK-NEXT: vmov.f32 s2, s4
|
||||
; CHECK-NEXT: vmov.f32 s3, s6
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, lr
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r2, r12
|
||||
; CHECK-NEXT: vmov.32 q1[0], r2
|
||||
; CHECK-NEXT: vmov.32 q1[1], r3
|
||||
; CHECK-NEXT: vmov.32 q1[2], r12
|
||||
; CHECK-NEXT: vmov.32 q1[3], lr
|
||||
; CHECK-NEXT: vmov.f64 d4, d2
|
||||
; CHECK-NEXT: vmov.f32 s9, s6
|
||||
; CHECK-NEXT: vmov.f32 s10, s0
|
||||
|
@ -207,22 +209,23 @@ define void @vst4_v2i16(<2 x i16> *%src, <8 x i16> *%dst) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: ldrh r2, [r0]
|
||||
; CHECK-NEXT: ldrh.w r12, [r0, #4]
|
||||
; CHECK-NEXT: ldrh r4, [r0]
|
||||
; CHECK-NEXT: ldrh.w lr, [r0, #4]
|
||||
; CHECK-NEXT: ldrh r3, [r0, #8]
|
||||
; CHECK-NEXT: ldrh.w lr, [r0, #6]
|
||||
; CHECK-NEXT: ldrh r4, [r0, #10]
|
||||
; CHECK-NEXT: vmov.32 q0[0], r4
|
||||
; CHECK-NEXT: ldrh.w r12, [r0, #6]
|
||||
; CHECK-NEXT: ldrh r2, [r0, #10]
|
||||
; CHECK-NEXT: ldrh r0, [r0, #2]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov.16 q0[0], r2
|
||||
; CHECK-NEXT: vmov.16 q0[1], r12
|
||||
; CHECK-NEXT: vmov.32 q0[2], r0
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: vmov.16 q0[0], r4
|
||||
; CHECK-NEXT: vmov.16 q0[1], lr
|
||||
; CHECK-NEXT: vmov.16 q0[2], r3
|
||||
; CHECK-NEXT: vmov.16 q0[3], r3
|
||||
; CHECK-NEXT: vmov.16 q0[4], r0
|
||||
; CHECK-NEXT: vmov.16 q0[5], lr
|
||||
; CHECK-NEXT: vmov.16 q0[6], r4
|
||||
; CHECK-NEXT: vmov.16 q0[7], r4
|
||||
; CHECK-NEXT: vmov.16 q0[5], r12
|
||||
; CHECK-NEXT: vmov.16 q0[6], r2
|
||||
; CHECK-NEXT: vmov.16 q0[7], r2
|
||||
; CHECK-NEXT: vstrw.32 q0, [r1]
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
entry:
|
||||
|
@ -373,8 +376,9 @@ define void @vst4_v2i8(<2 x i8> *%src, <8 x i8> *%dst) {
|
|||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: ldrb r2, [r0]
|
||||
; CHECK-NEXT: ldrb r3, [r0, #1]
|
||||
; CHECK-NEXT: vmov.32 q0[0], r2
|
||||
; CHECK-NEXT: ldrb.w r12, [r0, #2]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
|
||||
; CHECK-NEXT: vmov.32 q0[2], r3
|
||||
; CHECK-NEXT: ldrb.w lr, [r0, #3]
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: ldrb r4, [r0, #5]
|
||||
|
@ -907,58 +911,61 @@ entry:
|
|||
define void @vst4_v4f16(<4 x half> *%src, <16 x half> *%dst) {
|
||||
; CHECK-LABEL: vst4_v4f16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: ldrd lr, r12, [r0]
|
||||
; CHECK-NEXT: ldrd r3, r2, [r0, #8]
|
||||
; CHECK-NEXT: ldrd r4, r0, [r0, #16]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, lr
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r2, r12
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r4, r4
|
||||
; CHECK-NEXT: vmov r3, s5
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r0
|
||||
; CHECK-NEXT: vmov r2, s7
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: ldm.w r0, {r2, r3, r12, lr}
|
||||
; CHECK-NEXT: vmov.32 q0[0], r2
|
||||
; CHECK-NEXT: vmov.32 q0[1], r3
|
||||
; CHECK-NEXT: vmov.32 q0[2], r12
|
||||
; CHECK-NEXT: vmov.32 q0[3], lr
|
||||
; CHECK-NEXT: vmov r3, s1
|
||||
; CHECK-NEXT: vmovx.f16 s12, s1
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: vmov.16 q2[0], r3
|
||||
; CHECK-NEXT: vmov.16 q2[1], r2
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: ldrd r2, r0, [r0, #16]
|
||||
; CHECK-NEXT: vmov.32 q1[0], r2
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov.32 q1[2], r2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: vmov r0, s5
|
||||
; CHECK-NEXT: vmov.16 q2[2], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: vmovx.f16 s12, s5
|
||||
; CHECK-NEXT: vmov r0, s7
|
||||
; CHECK-NEXT: vmov.16 q2[3], r0
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmovx.f16 s12, s7
|
||||
; CHECK-NEXT: vmov.16 q2[4], r0
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmovx.f16 s12, s1
|
||||
; CHECK-NEXT: vmov.16 q2[5], r0
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmovx.f16 s12, s3
|
||||
; CHECK-NEXT: vmov.16 q2[4], r0
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmovx.f16 s12, s5
|
||||
; CHECK-NEXT: vmov.16 q2[5], r0
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmovx.f16 s12, s7
|
||||
; CHECK-NEXT: vmov.16 q2[6], r0
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmovx.f16 s12, s0
|
||||
; CHECK-NEXT: vmov.16 q2[7], r0
|
||||
; CHECK-NEXT: vmov r2, s4
|
||||
; CHECK-NEXT: vstrw.32 q2, [r1, #16]
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.16 q2[0], r2
|
||||
; CHECK-NEXT: vmovx.f16 s12, s4
|
||||
; CHECK-NEXT: vmov.16 q2[1], r0
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov.16 q2[2], r0
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vstrw.32 q2, [r1, #16]
|
||||
; CHECK-NEXT: vmov.16 q2[0], r2
|
||||
; CHECK-NEXT: vmov.16 q2[1], r0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov.16 q2[2], r0
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov.16 q2[3], r0
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmovx.f16 s4, s6
|
||||
; CHECK-NEXT: vmov.16 q2[4], r0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmovx.f16 s4, s0
|
||||
; CHECK-NEXT: vmov.16 q2[5], r0
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmovx.f16 s0, s2
|
||||
; CHECK-NEXT: vmov.16 q2[4], r0
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmovx.f16 s0, s4
|
||||
; CHECK-NEXT: vmov.16 q2[5], r0
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmovx.f16 s0, s6
|
||||
; CHECK-NEXT: vmov.16 q2[6], r0
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov.16 q2[7], r0
|
||||
; CHECK-NEXT: vstrw.32 q2, [r1]
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%s1 = getelementptr <4 x half>, <4 x half>* %src, i32 0
|
||||
%l1 = load <4 x half>, <4 x half>* %s1, align 4
|
||||
|
|
|
@ -276,36 +276,44 @@ define void @foo_int32_int8_both(<16 x i32>* %dest, <16 x i8>* readonly %src, i3
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrb.s16 q1, [r1, #8]
|
||||
; CHECK-NEXT: vmov.u16 r2, q1[4]
|
||||
; CHECK-NEXT: vmov.u16 r3, q1[6]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
|
||||
; CHECK-NEXT: vmov.32 q0[0], r2
|
||||
; CHECK-NEXT: vmov.u16 r2, q1[5]
|
||||
; CHECK-NEXT: vmov.u16 r3, q1[7]
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
|
||||
; CHECK-NEXT: vmov.u16 r2, q1[2]
|
||||
; CHECK-NEXT: vmov.32 q0[1], r2
|
||||
; CHECK-NEXT: vmov.u16 r2, q1[6]
|
||||
; CHECK-NEXT: vmov.32 q0[2], r2
|
||||
; CHECK-NEXT: vmov.u16 r2, q1[7]
|
||||
; CHECK-NEXT: vmov.32 q0[3], r2
|
||||
; CHECK-NEXT: vmovlb.u16 q2, q0
|
||||
; CHECK-NEXT: vldrb.s16 q0, [r1]
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[0]
|
||||
; CHECK-NEXT: vstrw.32 q2, [r0, #48]
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r2, r1
|
||||
; CHECK-NEXT: vmov.32 q2[0], r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[1]
|
||||
; CHECK-NEXT: vmov.u16 r2, q1[3]
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r2, r1
|
||||
; CHECK-NEXT: vmov.32 q2[1], r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[2]
|
||||
; CHECK-NEXT: vmov.32 q2[2], r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[3]
|
||||
; CHECK-NEXT: vmov.32 q2[3], r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[4]
|
||||
; CHECK-NEXT: vmovlb.u16 q1, q2
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[6]
|
||||
; CHECK-NEXT: vstrw.32 q1, [r0, #32]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r2, r1
|
||||
; CHECK-NEXT: vmov.32 q1[0], r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[5]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[7]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r2, r1
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[6]
|
||||
; CHECK-NEXT: vmov.32 q1[2], r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[7]
|
||||
; CHECK-NEXT: vmov.32 q1[3], r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[0]
|
||||
; CHECK-NEXT: vmovlb.u16 q1, q1
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[2]
|
||||
; CHECK-NEXT: vstrw.32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r2, r1
|
||||
; CHECK-NEXT: vmov.32 q1[0], r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[1]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[3]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r2, r1
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[2]
|
||||
; CHECK-NEXT: vmov.32 q1[2], r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[3]
|
||||
; CHECK-NEXT: vmov.32 q1[3], r1
|
||||
; CHECK-NEXT: vmovlb.u16 q0, q1
|
||||
; CHECK-NEXT: vstrw.32 q0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
|
|
Loading…
Reference in New Issue