[ARM] MVE VMOV.i64

In the original batch of MVE VMOVimm code generation VMOV.i64 was left
out due to the way it was done downstream. It turns out that it's fairly
simple though. This adds the codegen for it, similar to NEON.

Bigendian is technically incorrect in this version, which John is fixing
in a Neon patch.
This commit is contained in:
David Green 2020-03-28 16:22:05 +00:00
parent b4695351cb
commit c9eaed5149
8 changed files with 60 additions and 366 deletions

View File

@ -7181,7 +7181,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
return DAG.getUNDEF(VT);
if ((ST->hasNEON() && SplatBitSize <= 64) ||
(ST->hasMVEIntegerOps() && SplatBitSize <= 32)) {
(ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
// Check if an immediate VMOV works.
EVT VmovVT;
SDValue Val = isVMOVModifiedImm(SplatBits.getZExtValue(),

View File

@ -2389,6 +2389,8 @@ let Predicates = [HasMVEInt] in {
(v8i16 (MVE_VMOVimmi16 nImmSplatI16:$simm))>;
def : Pat<(v4i32 (ARMvmovImm timm:$simm)),
(v4i32 (MVE_VMOVimmi32 nImmVMOVI32:$simm))>;
def : Pat<(v2i64 (ARMvmovImm timm:$simm)),
(v2i64 (MVE_VMOVimmi64 nImmSplatI64:$simm))>;
def : Pat<(v8i16 (ARMvmvnImm timm:$simm)),
(v8i16 (MVE_VMVNimmi16 nImmSplatI16:$simm))>;

View File

@ -251,21 +251,13 @@ define arm_aapcs_vfpcc <2 x i32> @ptr_v2i16_zext(<2 x i16*>* %offptr) {
; CHECK-LABEL: ptr_v2i16_zext:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r1, r0, [r0]
; CHECK-NEXT: adr r2, .LCPI9_0
; CHECK-NEXT: vmov.i64 q0, #0xffff
; CHECK-NEXT: ldrh r0, [r0]
; CHECK-NEXT: vldrw.u32 q0, [r2]
; CHECK-NEXT: ldrh r1, [r1]
; CHECK-NEXT: vmov.32 q1[0], r1
; CHECK-NEXT: vmov.32 q1[2], r0
; CHECK-NEXT: vand q0, q1, q0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI9_0:
; CHECK-NEXT: .long 65535 @ 0xffff
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 65535 @ 0xffff
; CHECK-NEXT: .long 0 @ 0x0
entry:
%offs = load <2 x i16*>, <2 x i16*>* %offptr, align 4
%gather = call <2 x i16> @llvm.masked.gather.v2i16.v2p0i16(<2 x i16*> %offs, i32 2, <2 x i1> <i1 true, i1 true>, <2 x i16> undef)

View File

@ -408,6 +408,7 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32>
; CHECK-LE-NEXT: ldrd lr, r12, [r1]
; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: @ implicit-def: $q1
; CHECK-LE-NEXT: vmov.i64 q2, #0xffffffff
; CHECK-LE-NEXT: rsbs.w r3, lr, #0
; CHECK-LE-NEXT: vmov.32 q0[0], lr
; CHECK-LE-NEXT: sbcs.w r3, r1, lr, asr #31
@ -424,23 +425,21 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32>
; CHECK-LE-NEXT: bfi r1, lr, #0, #1
; CHECK-LE-NEXT: vmov.32 q0[2], r12
; CHECK-LE-NEXT: and r3, r1, #3
; CHECK-LE-NEXT: adr.w r12, .LCPI7_0
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: lsls r1, r1, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: ldrne r1, [r2]
; CHECK-LE-NEXT: vmovne.32 q1[0], r1
; CHECK-LE-NEXT: lsls r1, r3, #30
; CHECK-LE-NEXT: vmov r3, s0
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: ldrmi r1, [r2, #4]
; CHECK-LE-NEXT: vmovmi.32 q1[2], r1
; CHECK-LE-NEXT: vmov r1, s0
; CHECK-LE-NEXT: movs r2, #0
; CHECK-LE-NEXT: vldrw.u32 q2, [r12]
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: vand q1, q1, q2
; CHECK-LE-NEXT: rsbs r1, r3, #0
; CHECK-LE-NEXT: sbcs.w r1, r2, r3, asr #31
; CHECK-LE-NEXT: rsbs r3, r1, #0
; CHECK-LE-NEXT: vmov r3, s2
; CHECK-LE-NEXT: sbcs.w r1, r2, r1, asr #31
; CHECK-LE-NEXT: it lt
; CHECK-LE-NEXT: movlt.w r12, #1
; CHECK-LE-NEXT: rsbs r1, r3, #0
@ -460,13 +459,6 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32>
; CHECK-LE-NEXT: vstrmi d3, [r0, #8]
; CHECK-LE-NEXT: add sp, #4
; CHECK-LE-NEXT: pop {r7, pc}
; CHECK-LE-NEXT: .p2align 4
; CHECK-LE-NEXT: @ %bb.1:
; CHECK-LE-NEXT: .LCPI7_0:
; CHECK-LE-NEXT: .long 4294967295 @ 0xffffffff
; CHECK-LE-NEXT: .long 0 @ 0x0
; CHECK-LE-NEXT: .long 4294967295 @ 0xffffffff
; CHECK-LE-NEXT: .long 0 @ 0x0
;
; CHECK-BE-LABEL: foo_zext_v2i64_v2i32:
; CHECK-BE: @ %bb.0: @ %entry
@ -511,15 +503,13 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32>
; CHECK-BE-NEXT: .LBB7_4: @ %else2
; CHECK-BE-NEXT: vrev64.32 q3, q2
; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vmov r3, s15
; CHECK-BE-NEXT: adr.w r12, .LCPI7_0
; CHECK-BE-NEXT: vldrb.u8 q0, [r12]
; CHECK-BE-NEXT: vmov r1, s15
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: vrev64.8 q2, q0
; CHECK-BE-NEXT: vand q0, q1, q2
; CHECK-BE-NEXT: rsbs r1, r3, #0
; CHECK-BE-NEXT: sbcs.w r1, r2, r3, asr #31
; CHECK-BE-NEXT: vmov.i64 q0, #0xffffffff
; CHECK-BE-NEXT: vand q0, q1, q0
; CHECK-BE-NEXT: rsbs r3, r1, #0
; CHECK-BE-NEXT: vmov r3, s13
; CHECK-BE-NEXT: sbcs.w r1, r2, r1, asr #31
; CHECK-BE-NEXT: it lt
; CHECK-BE-NEXT: movlt.w r12, #1
; CHECK-BE-NEXT: rsbs r1, r3, #0
@ -539,13 +529,6 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32>
; CHECK-BE-NEXT: vstrmi d1, [r0, #8]
; CHECK-BE-NEXT: add sp, #4
; CHECK-BE-NEXT: pop {r7, pc}
; CHECK-BE-NEXT: .p2align 4
; CHECK-BE-NEXT: @ %bb.5:
; CHECK-BE-NEXT: .LCPI7_0:
; CHECK-BE-NEXT: .long 0 @ 0x0
; CHECK-BE-NEXT: .long 4294967295 @ 0xffffffff
; CHECK-BE-NEXT: .long 0 @ 0x0
; CHECK-BE-NEXT: .long 4294967295 @ 0xffffffff
entry:
%0 = load <2 x i32>, <2 x i32>* %mask, align 4
%1 = icmp sgt <2 x i32> %0, zeroinitializer
@ -565,6 +548,7 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask,
; CHECK-LE-NEXT: ldrd lr, r12, [r1]
; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: @ implicit-def: $q1
; CHECK-LE-NEXT: vmov.i64 q2, #0xffffffff
; CHECK-LE-NEXT: rsbs.w r3, lr, #0
; CHECK-LE-NEXT: vmov.32 q0[0], lr
; CHECK-LE-NEXT: sbcs.w r3, r1, lr, asr #31
@ -581,23 +565,21 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask,
; CHECK-LE-NEXT: bfi r1, lr, #0, #1
; CHECK-LE-NEXT: vmov.32 q0[2], r12
; CHECK-LE-NEXT: and r3, r1, #3
; CHECK-LE-NEXT: adr.w r12, .LCPI8_0
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: lsls r1, r1, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: ldrne r1, [r2]
; CHECK-LE-NEXT: vmovne.32 q1[0], r1
; CHECK-LE-NEXT: lsls r1, r3, #30
; CHECK-LE-NEXT: vmov r3, s0
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: ldrmi r1, [r2, #4]
; CHECK-LE-NEXT: vmovmi.32 q1[2], r1
; CHECK-LE-NEXT: vmov r1, s0
; CHECK-LE-NEXT: movs r2, #0
; CHECK-LE-NEXT: vldrw.u32 q2, [r12]
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: vand q1, q1, q2
; CHECK-LE-NEXT: rsbs r1, r3, #0
; CHECK-LE-NEXT: sbcs.w r1, r2, r3, asr #31
; CHECK-LE-NEXT: rsbs r3, r1, #0
; CHECK-LE-NEXT: vmov r3, s2
; CHECK-LE-NEXT: sbcs.w r1, r2, r1, asr #31
; CHECK-LE-NEXT: it lt
; CHECK-LE-NEXT: movlt.w r12, #1
; CHECK-LE-NEXT: rsbs r1, r3, #0
@ -619,13 +601,6 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask,
; CHECK-LE-NEXT: strdmi r1, r2, [r0, #8]
; CHECK-LE-NEXT: add sp, #4
; CHECK-LE-NEXT: pop {r7, pc}
; CHECK-LE-NEXT: .p2align 4
; CHECK-LE-NEXT: @ %bb.1:
; CHECK-LE-NEXT: .LCPI8_0:
; CHECK-LE-NEXT: .long 4294967295 @ 0xffffffff
; CHECK-LE-NEXT: .long 0 @ 0x0
; CHECK-LE-NEXT: .long 4294967295 @ 0xffffffff
; CHECK-LE-NEXT: .long 0 @ 0x0
;
; CHECK-BE-LABEL: foo_zext_v2i64_v2i32_unaligned:
; CHECK-BE: @ %bb.0: @ %entry
@ -670,15 +645,13 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask,
; CHECK-BE-NEXT: .LBB8_4: @ %else2
; CHECK-BE-NEXT: vrev64.32 q3, q2
; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vmov r3, s15
; CHECK-BE-NEXT: adr.w r12, .LCPI8_0
; CHECK-BE-NEXT: vldrb.u8 q0, [r12]
; CHECK-BE-NEXT: vmov r1, s15
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: vrev64.8 q2, q0
; CHECK-BE-NEXT: vand q0, q1, q2
; CHECK-BE-NEXT: rsbs r1, r3, #0
; CHECK-BE-NEXT: sbcs.w r1, r2, r3, asr #31
; CHECK-BE-NEXT: vmov.i64 q0, #0xffffffff
; CHECK-BE-NEXT: vand q0, q1, q0
; CHECK-BE-NEXT: rsbs r3, r1, #0
; CHECK-BE-NEXT: vmov r3, s13
; CHECK-BE-NEXT: sbcs.w r1, r2, r1, asr #31
; CHECK-BE-NEXT: it lt
; CHECK-BE-NEXT: movlt.w r12, #1
; CHECK-BE-NEXT: rsbs r1, r3, #0
@ -700,13 +673,6 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask,
; CHECK-BE-NEXT: strdmi r2, r1, [r0, #8]
; CHECK-BE-NEXT: add sp, #4
; CHECK-BE-NEXT: pop {r7, pc}
; CHECK-BE-NEXT: .p2align 4
; CHECK-BE-NEXT: @ %bb.5:
; CHECK-BE-NEXT: .LCPI8_0:
; CHECK-BE-NEXT: .long 0 @ 0x0
; CHECK-BE-NEXT: .long 4294967295 @ 0xffffffff
; CHECK-BE-NEXT: .long 0 @ 0x0
; CHECK-BE-NEXT: .long 4294967295 @ 0xffffffff
entry:
%0 = load <2 x i32>, <2 x i32>* %mask, align 4
%1 = icmp sgt <2 x i32> %0, zeroinitializer

View File

@ -430,17 +430,9 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @zext_v2i32_v2i64(<2 x i32> %src) {
; CHECK-LABEL: zext_v2i32_v2i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI20_0
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vmov.i64 q1, #0xffffffff
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI20_0:
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
; CHECK-NEXT: .long 0 @ 0x0
entry:
%0 = zext <2 x i32> %src to <2 x i64>
ret <2 x i64> %0

View File

@ -36,8 +36,7 @@ entry:
define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_zext(<2 x i32> %x) {
; CHECK-LABEL: add_v2i32_v2i64_zext:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI3_0
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vmov.i64 q1, #0xffffffff
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vmov r3, s0
@ -46,13 +45,6 @@ define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_zext(<2 x i32> %x) {
; CHECK-NEXT: adds r0, r0, r3
; CHECK-NEXT: adcs r1, r2
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI3_0:
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
; CHECK-NEXT: .long 0 @ 0x0
entry:
%xx = zext <2 x i32> %x to <2 x i64>
%z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx)
@ -138,11 +130,10 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_zext(<8 x i16> %x) {
; CHECK-LABEL: add_v8i16_v8i64_zext:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.u16 r0, q0[0]
; CHECK-NEXT: vmov.i64 q1, #0xffff
; CHECK-NEXT: vmov.32 q2[0], r0
; CHECK-NEXT: vmov.u16 r0, q0[1]
; CHECK-NEXT: vmov.32 q2[2], r0
; CHECK-NEXT: adr r0, .LCPI10_0
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vand q2, q2, q1
; CHECK-NEXT: vmov r0, s10
; CHECK-NEXT: vmov r1, s8
@ -182,13 +173,6 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_zext(<8 x i16> %x) {
; CHECK-NEXT: adds r0, r0, r3
; CHECK-NEXT: adcs r1, r2
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI10_0:
; CHECK-NEXT: .long 65535 @ 0xffff
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 65535 @ 0xffff
; CHECK-NEXT: .long 0 @ 0x0
entry:
%xx = zext <8 x i16> %x to <8 x i64>
%z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %xx)
@ -265,21 +249,13 @@ entry:
define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_zext(<2 x i16> %x) {
; CHECK-LABEL: add_v2i16_v2i64_zext:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI12_0
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vmov.i64 q1, #0xffff
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: add r0, r1
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI12_0:
; CHECK-NEXT: .long 65535 @ 0xffff
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 65535 @ 0xffff
; CHECK-NEXT: .long 0 @ 0x0
entry:
%xx = zext <2 x i16> %x to <2 x i64>
%z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx)
@ -489,11 +465,10 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_zext(<16 x i8> %x) {
; CHECK-LABEL: add_v16i8_v16i64_zext:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.u8 r0, q0[0]
; CHECK-NEXT: vmov.i64 q1, #0xff
; CHECK-NEXT: vmov.32 q2[0], r0
; CHECK-NEXT: vmov.u8 r0, q0[1]
; CHECK-NEXT: vmov.32 q2[2], r0
; CHECK-NEXT: adr r0, .LCPI23_0
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vand q2, q2, q1
; CHECK-NEXT: vmov r0, s10
; CHECK-NEXT: vmov r1, s8
@ -585,13 +560,6 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_zext(<16 x i8> %x) {
; CHECK-NEXT: adds r0, r0, r3
; CHECK-NEXT: adcs r1, r2
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI23_0:
; CHECK-NEXT: .long 255 @ 0xff
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 255 @ 0xff
; CHECK-NEXT: .long 0 @ 0x0
entry:
%xx = zext <16 x i8> %x to <16 x i64>
%z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %xx)
@ -736,21 +704,13 @@ entry:
define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_zext(<2 x i8> %x) {
; CHECK-LABEL: add_v2i8_v2i64_zext:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI25_0
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vmov.i64 q1, #0xff
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: add r0, r1
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI25_0:
; CHECK-NEXT: .long 255 @ 0xff
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 255 @ 0xff
; CHECK-NEXT: .long 0 @ 0x0
entry:
%xx = zext <2 x i8> %x to <2 x i64>
%z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx)
@ -832,8 +792,7 @@ define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_acc_zext(<2 x i32> %x, i64 %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: adr r2, .LCPI31_0
; CHECK-NEXT: vldrw.u32 q1, [r2]
; CHECK-NEXT: vmov.i64 q1, #0xffffffff
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov r3, s0
@ -844,13 +803,6 @@ define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_acc_zext(<2 x i32> %x, i64 %a) {
; CHECK-NEXT: adds r0, r0, r2
; CHECK-NEXT: adcs r1, r3
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI31_0:
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
; CHECK-NEXT: .long 0 @ 0x0
entry:
%xx = zext <2 x i32> %x to <2 x i64>
%z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx)
@ -947,11 +899,10 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_zext(<8 x i16> %x, i64 %a) {
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: vmov.u16 r2, q0[0]
; CHECK-NEXT: vmov.i64 q1, #0xffff
; CHECK-NEXT: vmov.32 q2[0], r2
; CHECK-NEXT: vmov.u16 r2, q0[1]
; CHECK-NEXT: vmov.32 q2[2], r2
; CHECK-NEXT: adr r2, .LCPI38_0
; CHECK-NEXT: vldrw.u32 q1, [r2]
; CHECK-NEXT: vand q2, q2, q1
; CHECK-NEXT: vmov r2, s10
; CHECK-NEXT: vmov r3, s8
@ -993,13 +944,6 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_zext(<8 x i16> %x, i64 %a) {
; CHECK-NEXT: adds r0, r0, r2
; CHECK-NEXT: adcs r1, r3
; CHECK-NEXT: pop {r4, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI38_0:
; CHECK-NEXT: .long 65535 @ 0xffff
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 65535 @ 0xffff
; CHECK-NEXT: .long 0 @ 0x0
entry:
%xx = zext <8 x i16> %x to <8 x i64>
%z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %xx)
@ -1082,8 +1026,7 @@ entry:
define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, i64 %a) {
; CHECK-LABEL: add_v2i16_v2i64_acc_zext:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r2, .LCPI40_0
; CHECK-NEXT: vldrw.u32 q1, [r2]
; CHECK-NEXT: vmov.i64 q1, #0xffff
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov r3, s0
@ -1092,13 +1035,6 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, i64 %a) {
; CHECK-NEXT: adds r0, r0, r2
; CHECK-NEXT: adcs r1, r3
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI40_0:
; CHECK-NEXT: .long 65535 @ 0xffff
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 65535 @ 0xffff
; CHECK-NEXT: .long 0 @ 0x0
entry:
%xx = zext <2 x i16> %x to <2 x i64>
%z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx)
@ -1323,11 +1259,10 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, i64 %a) {
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: vmov.u8 r2, q0[0]
; CHECK-NEXT: vmov.i64 q1, #0xff
; CHECK-NEXT: vmov.32 q2[0], r2
; CHECK-NEXT: vmov.u8 r2, q0[1]
; CHECK-NEXT: vmov.32 q2[2], r2
; CHECK-NEXT: adr r2, .LCPI51_0
; CHECK-NEXT: vldrw.u32 q1, [r2]
; CHECK-NEXT: vand q2, q2, q1
; CHECK-NEXT: vmov r2, s10
; CHECK-NEXT: vmov r3, s8
@ -1421,13 +1356,6 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, i64 %a) {
; CHECK-NEXT: adds r0, r0, r2
; CHECK-NEXT: adcs r1, r3
; CHECK-NEXT: pop {r4, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI51_0:
; CHECK-NEXT: .long 255 @ 0xff
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 255 @ 0xff
; CHECK-NEXT: .long 0 @ 0x0
entry:
%xx = zext <16 x i8> %x to <16 x i64>
%z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %xx)
@ -1578,8 +1506,7 @@ entry:
define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, i64 %a) {
; CHECK-LABEL: add_v2i8_v2i64_acc_zext:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r2, .LCPI53_0
; CHECK-NEXT: vldrw.u32 q1, [r2]
; CHECK-NEXT: vmov.i64 q1, #0xff
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov r3, s0
@ -1588,13 +1515,6 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, i64 %a) {
; CHECK-NEXT: adds r0, r0, r2
; CHECK-NEXT: adcs r1, r3
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI53_0:
; CHECK-NEXT: .long 255 @ 0xff
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 255 @ 0xff
; CHECK-NEXT: .long 0 @ 0x0
entry:
%xx = zext <2 x i8> %x to <2 x i64>
%z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx)

View File

@ -172,8 +172,7 @@ entry:
define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %y) {
; CHECK-LABEL: add_v2i16_v2i64_zext:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI12_0
; CHECK-NEXT: vldrw.u32 q2, [r0]
; CHECK-NEXT: vmov.i64 q2, #0xffff
; CHECK-NEXT: vand q1, q1, q2
; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: vmov r0, s4
@ -183,13 +182,6 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %y) {
; CHECK-NEXT: umull r0, r1, r1, r0
; CHECK-NEXT: umlal r0, r1, r3, r2
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI12_0:
; CHECK-NEXT: .long 65535 @ 0xffff
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 65535 @ 0xffff
; CHECK-NEXT: .long 0 @ 0x0
entry:
%xx = zext <2 x i16> %x to <2 x i64>
%yy = zext <2 x i16> %y to <2 x i64>
@ -507,11 +499,10 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_zext(<16 x i8> %x, <16 x i8> %y) {
; CHECK-NEXT: vmov.u8 r1, q0[0]
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.u8 r0, q1[1]
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: adr r0, .LCPI23_0
; CHECK-NEXT: vldrw.u32 q2, [r0]
; CHECK-NEXT: vmov.32 q4[0], r1
; CHECK-NEXT: vmov.u8 r1, q0[1]
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.i64 q2, #0xff
; CHECK-NEXT: vmov.32 q4[2], r1
; CHECK-NEXT: vand q3, q3, q2
; CHECK-NEXT: vand q4, q4, q2
@ -703,13 +694,6 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_zext(<16 x i8> %x, <16 x i8> %y) {
; CHECK-NEXT: umlal r0, r1, r3, r2
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI23_0:
; CHECK-NEXT: .long 255 @ 0xff
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 255 @ 0xff
; CHECK-NEXT: .long 0 @ 0x0
entry:
%xx = zext <16 x i8> %x to <16 x i64>
%yy = zext <16 x i8> %y to <16 x i64>
@ -888,8 +872,7 @@ entry:
define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: add_v2i8_v2i64_zext:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI25_0
; CHECK-NEXT: vldrw.u32 q2, [r0]
; CHECK-NEXT: vmov.i64 q2, #0xff
; CHECK-NEXT: vand q1, q1, q2
; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: vmov r0, s6
@ -901,13 +884,6 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %y) {
; CHECK-NEXT: add r0, r2
; CHECK-NEXT: orrs r1, r3
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI25_0:
; CHECK-NEXT: .long 255 @ 0xff
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 255 @ 0xff
; CHECK-NEXT: .long 0 @ 0x0
entry:
%xx = zext <2 x i8> %x to <2 x i64>
%yy = zext <2 x i8> %y to <2 x i64>
@ -1162,8 +1138,7 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, <2 x i16> %y,
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: adr r2, .LCPI40_0
; CHECK-NEXT: vldrw.u32 q2, [r2]
; CHECK-NEXT: vmov.i64 q2, #0xffff
; CHECK-NEXT: vand q1, q1, q2
; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: vmov r2, s4
@ -1175,13 +1150,6 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, <2 x i16> %y,
; CHECK-NEXT: adds r0, r0, r2
; CHECK-NEXT: adc.w r1, r1, lr
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI40_0:
; CHECK-NEXT: .long 65535 @ 0xffff
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 65535 @ 0xffff
; CHECK-NEXT: .long 0 @ 0x0
entry:
%xx = zext <2 x i16> %x to <2 x i64>
%yy = zext <2 x i16> %y to <2 x i64>
@ -1514,17 +1482,16 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, <16 x i8> %y
; CHECK-NEXT: vmov.u8 r3, q0[0]
; CHECK-NEXT: vmov.32 q3[0], r2
; CHECK-NEXT: vmov.u8 r2, q1[1]
; CHECK-NEXT: vmov.32 q3[2], r2
; CHECK-NEXT: adr r2, .LCPI51_0
; CHECK-NEXT: vldrw.u32 q2, [r2]
; CHECK-NEXT: vmov.32 q4[0], r3
; CHECK-NEXT: vmov.u8 r3, q0[1]
; CHECK-NEXT: vmov.u8 r4, q0[2]
; CHECK-NEXT: vmov.32 q3[2], r2
; CHECK-NEXT: vmov.i64 q2, #0xff
; CHECK-NEXT: vmov.32 q4[2], r3
; CHECK-NEXT: vand q3, q3, q2
; CHECK-NEXT: vand q4, q4, q2
; CHECK-NEXT: vmov r2, s14
; CHECK-NEXT: vmov r3, s18
; CHECK-NEXT: vmov.u8 r4, q0[2]
; CHECK-NEXT: umull r12, lr, r3, r2
; CHECK-NEXT: vmov r3, s16
; CHECK-NEXT: vmov r2, s12
@ -1712,13 +1679,6 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, <16 x i8> %y
; CHECK-NEXT: adcs r1, r3
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop {r4, r5, r7, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI51_0:
; CHECK-NEXT: .long 255 @ 0xff
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 255 @ 0xff
; CHECK-NEXT: .long 0 @ 0x0
entry:
%xx = zext <16 x i8> %x to <16 x i64>
%yy = zext <16 x i8> %y to <16 x i64>
@ -1905,8 +1865,7 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, <2 x i8> %y, i6
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: adr r2, .LCPI53_0
; CHECK-NEXT: vldrw.u32 q2, [r2]
; CHECK-NEXT: vmov.i64 q2, #0xff
; CHECK-NEXT: vand q1, q1, q2
; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: vmov r2, s6
@ -1920,13 +1879,6 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, <2 x i8> %y, i6
; CHECK-NEXT: adds r0, r0, r2
; CHECK-NEXT: adcs r1, r3
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI53_0:
; CHECK-NEXT: .long 255 @ 0xff
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 255 @ 0xff
; CHECK-NEXT: .long 0 @ 0x0
entry:
%xx = zext <2 x i8> %x to <2 x i64>
%yy = zext <2 x i8> %y to <2 x i64>

View File

@ -355,30 +355,13 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @mov_int64_ff() {
; CHECKLE-LABEL: mov_int64_ff:
; CHECKLE: @ %bb.0: @ %entry
; CHECKLE-NEXT: adr r0, .LCPI19_0
; CHECKLE-NEXT: vldrw.u32 q0, [r0]
; CHECKLE-NEXT: vmov.i64 q0, #0xff
; CHECKLE-NEXT: bx lr
; CHECKLE-NEXT: .p2align 4
; CHECKLE-NEXT: @ %bb.1:
; CHECKLE-NEXT: .LCPI19_0:
; CHECKLE-NEXT: .long 255 @ double 1.2598673968951787E-321
; CHECKLE-NEXT: .long 0
; CHECKLE-NEXT: .long 255 @ double 1.2598673968951787E-321
; CHECKLE-NEXT: .long 0
;
; CHECKBE-LABEL: mov_int64_ff:
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: adr r0, .LCPI19_0
; CHECKBE-NEXT: vldrb.u8 q1, [r0]
; CHECKBE-NEXT: vrev64.8 q0, q1
; CHECKBE-NEXT: vmov.i64 q0, #0xff00000000
; CHECKBE-NEXT: bx lr
; CHECKBE-NEXT: .p2align 4
; CHECKBE-NEXT: @ %bb.1:
; CHECKBE-NEXT: .LCPI19_0:
; CHECKBE-NEXT: .long 0 @ double 1.2598673968951787E-321
; CHECKBE-NEXT: .long 255
; CHECKBE-NEXT: .long 0 @ double 1.2598673968951787E-321
; CHECKBE-NEXT: .long 255
entry:
ret <2 x i64> < i64 255, i64 255 >
}
@ -401,30 +384,13 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @mov_int64_ff0000ff0000ffff() {
; CHECKLE-LABEL: mov_int64_ff0000ff0000ffff:
; CHECKLE: @ %bb.0: @ %entry
; CHECKLE-NEXT: adr r0, .LCPI21_0
; CHECKLE-NEXT: vldrw.u32 q0, [r0]
; CHECKLE-NEXT: vmov.i64 q0, #0xff0000ff0000ffff
; CHECKLE-NEXT: bx lr
; CHECKLE-NEXT: .p2align 4
; CHECKLE-NEXT: @ %bb.1:
; CHECKLE-NEXT: .LCPI21_0:
; CHECKLE-NEXT: .long 65535 @ double -5.4874582226568829E+303
; CHECKLE-NEXT: .long 4278190335
; CHECKLE-NEXT: .long 65535 @ double -5.4874582226568829E+303
; CHECKLE-NEXT: .long 4278190335
;
; CHECKBE-LABEL: mov_int64_ff0000ff0000ffff:
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: adr r0, .LCPI21_0
; CHECKBE-NEXT: vldrb.u8 q1, [r0]
; CHECKBE-NEXT: vrev64.8 q0, q1
; CHECKBE-NEXT: vmov.i64 q0, #0xffffff0000ff
; CHECKBE-NEXT: bx lr
; CHECKBE-NEXT: .p2align 4
; CHECKBE-NEXT: @ %bb.1:
; CHECKBE-NEXT: .LCPI21_0:
; CHECKBE-NEXT: .long 4278190335 @ double -5.4874582226568829E+303
; CHECKBE-NEXT: .long 65535
; CHECKBE-NEXT: .long 4278190335 @ double -5.4874582226568829E+303
; CHECKBE-NEXT: .long 65535
entry:
ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
}
@ -463,30 +429,13 @@ entry:
define arm_aapcs_vfpcc <16 x i8> @mov_int64_0f000f0f() {
; CHECKLE-LABEL: mov_int64_0f000f0f:
; CHECKLE: @ %bb.0: @ %entry
; CHECKLE-NEXT: adr r0, .LCPI23_0
; CHECKLE-NEXT: vldrw.u32 q0, [r0]
; CHECKLE-NEXT: vmov.i64 q0, #0xff000000ff00ff
; CHECKLE-NEXT: bx lr
; CHECKLE-NEXT: .p2align 4
; CHECKLE-NEXT: @ %bb.1:
; CHECKLE-NEXT: .LCPI23_0:
; CHECKLE-NEXT: .long 16711935 @ double 7.0632744699731897E-304
; CHECKLE-NEXT: .long 16711680
; CHECKLE-NEXT: .long 16711935 @ double 7.0632744699731897E-304
; CHECKLE-NEXT: .long 16711680
;
; CHECKBE-LABEL: mov_int64_0f000f0f:
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: adr r0, .LCPI23_0
; CHECKBE-NEXT: vldrb.u8 q1, [r0]
; CHECKBE-NEXT: vrev64.8 q0, q1
; CHECKBE-NEXT: vmov.i64 q0, #0xff00ff00ff00
; CHECKBE-NEXT: bx lr
; CHECKBE-NEXT: .p2align 4
; CHECKBE-NEXT: @ %bb.1:
; CHECKBE-NEXT: .LCPI23_0:
; CHECKBE-NEXT: .long 4278255360 @ double -5.8276674374138332E+303
; CHECKBE-NEXT: .long 65280
; CHECKBE-NEXT: .long 4278255360 @ double -5.8276674374138332E+303
; CHECKBE-NEXT: .long 65280
entry:
ret <16 x i8> <i8 -1, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 -1, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0>
}
@ -494,30 +443,13 @@ entry:
define arm_aapcs_vfpcc <8 x i16> @mov_int64_ff00ffff() {
; CHECKLE-LABEL: mov_int64_ff00ffff:
; CHECKLE: @ %bb.0: @ %entry
; CHECKLE-NEXT: adr r0, .LCPI24_0
; CHECKLE-NEXT: vldrw.u32 q0, [r0]
; CHECKLE-NEXT: vmov.i64 q0, #0xffffffff0000ffff
; CHECKLE-NEXT: bx lr
; CHECKLE-NEXT: .p2align 4
; CHECKLE-NEXT: @ %bb.1:
; CHECKLE-NEXT: .LCPI24_0:
; CHECKLE-NEXT: .long 65535 @ double NaN
; CHECKLE-NEXT: .long 4294967295
; CHECKLE-NEXT: .long 65535 @ double NaN
; CHECKLE-NEXT: .long 4294967295
;
; CHECKBE-LABEL: mov_int64_ff00ffff:
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: adr r0, .LCPI24_0
; CHECKBE-NEXT: vldrb.u8 q1, [r0]
; CHECKBE-NEXT: vrev64.8 q0, q1
; CHECKBE-NEXT: vmov.i64 q0, #0xffffffffffff0000
; CHECKBE-NEXT: bx lr
; CHECKBE-NEXT: .p2align 4
; CHECKBE-NEXT: @ %bb.1:
; CHECKBE-NEXT: .LCPI24_0:
; CHECKBE-NEXT: .long 4294901760 @ double NaN
; CHECKBE-NEXT: .long 4294967295
; CHECKBE-NEXT: .long 4294901760 @ double NaN
; CHECKBE-NEXT: .long 4294967295
entry:
ret <8 x i16> <i16 -1, i16 0, i16 -1, i16 -1, i16 -1, i16 0, i16 -1, i16 -1>
}
@ -665,57 +597,18 @@ entry:
define arm_aapcs_vfpcc <16 x i8> @test(<16 x i8> %i) {
; CHECKLE-LABEL: test:
; CHECKLE: @ %bb.0: @ %entry
; CHECKLE-NEXT: adr r0, .LCPI31_0
; CHECKLE-NEXT: vldrw.u32 q1, [r0]
; CHECKLE-NEXT: vmov.i64 q1, #0xff000000ff00ff
; CHECKLE-NEXT: vorr q0, q0, q1
; CHECKLE-NEXT: bx lr
; CHECKLE-NEXT: .p2align 4
; CHECKLE-NEXT: @ %bb.1:
; CHECKLE-NEXT: .LCPI31_0:
; CHECKLE-NEXT: .byte 255 @ 0xff
; CHECKLE-NEXT: .byte 0 @ 0x0
; CHECKLE-NEXT: .byte 255 @ 0xff
; CHECKLE-NEXT: .byte 0 @ 0x0
; CHECKLE-NEXT: .byte 0 @ 0x0
; CHECKLE-NEXT: .byte 0 @ 0x0
; CHECKLE-NEXT: .byte 255 @ 0xff
; CHECKLE-NEXT: .byte 0 @ 0x0
; CHECKLE-NEXT: .byte 255 @ 0xff
; CHECKLE-NEXT: .byte 0 @ 0x0
; CHECKLE-NEXT: .byte 255 @ 0xff
; CHECKLE-NEXT: .byte 0 @ 0x0
; CHECKLE-NEXT: .byte 0 @ 0x0
; CHECKLE-NEXT: .byte 0 @ 0x0
; CHECKLE-NEXT: .byte 255 @ 0xff
; CHECKLE-NEXT: .byte 0 @ 0x0
;
; CHECKBE-LABEL: test:
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: adr r0, .LCPI31_0
; CHECKBE-NEXT: vmov.i64 q1, #0xff00ff00ff0000
; CHECKBE-NEXT: vrev64.8 q2, q1
; CHECKBE-NEXT: vrev64.8 q1, q0
; CHECKBE-NEXT: vldrb.u8 q0, [r0]
; CHECKBE-NEXT: vorr q1, q1, q0
; CHECKBE-NEXT: vorr q1, q1, q2
; CHECKBE-NEXT: vrev64.8 q0, q1
; CHECKBE-NEXT: bx lr
; CHECKBE-NEXT: .p2align 4
; CHECKBE-NEXT: @ %bb.1:
; CHECKBE-NEXT: .LCPI31_0:
; CHECKBE-NEXT: .byte 255 @ 0xff
; CHECKBE-NEXT: .byte 0 @ 0x0
; CHECKBE-NEXT: .byte 255 @ 0xff
; CHECKBE-NEXT: .byte 0 @ 0x0
; CHECKBE-NEXT: .byte 0 @ 0x0
; CHECKBE-NEXT: .byte 0 @ 0x0
; CHECKBE-NEXT: .byte 255 @ 0xff
; CHECKBE-NEXT: .byte 0 @ 0x0
; CHECKBE-NEXT: .byte 255 @ 0xff
; CHECKBE-NEXT: .byte 0 @ 0x0
; CHECKBE-NEXT: .byte 255 @ 0xff
; CHECKBE-NEXT: .byte 0 @ 0x0
; CHECKBE-NEXT: .byte 0 @ 0x0
; CHECKBE-NEXT: .byte 0 @ 0x0
; CHECKBE-NEXT: .byte 255 @ 0xff
; CHECKBE-NEXT: .byte 0 @ 0x0
entry:
%o = or <16 x i8> %i, <i8 -1, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 -1, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0>
ret <16 x i8> %o
@ -724,41 +617,18 @@ entry:
define arm_aapcs_vfpcc <8 x i16> @test2(<8 x i16> %i) {
; CHECKLE-LABEL: test2:
; CHECKLE: @ %bb.0: @ %entry
; CHECKLE-NEXT: adr r0, .LCPI32_0
; CHECKLE-NEXT: vldrw.u32 q1, [r0]
; CHECKLE-NEXT: vmov.i64 q1, #0xffffffff0000ffff
; CHECKLE-NEXT: vorr q0, q0, q1
; CHECKLE-NEXT: bx lr
; CHECKLE-NEXT: .p2align 4
; CHECKLE-NEXT: @ %bb.1:
; CHECKLE-NEXT: .LCPI32_0:
; CHECKLE-NEXT: .short 65535 @ 0xffff
; CHECKLE-NEXT: .short 0 @ 0x0
; CHECKLE-NEXT: .short 65535 @ 0xffff
; CHECKLE-NEXT: .short 65535 @ 0xffff
; CHECKLE-NEXT: .short 65535 @ 0xffff
; CHECKLE-NEXT: .short 0 @ 0x0
; CHECKLE-NEXT: .short 65535 @ 0xffff
; CHECKLE-NEXT: .short 65535 @ 0xffff
;
; CHECKBE-LABEL: test2:
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: adr r0, .LCPI32_0
; CHECKBE-NEXT: vmov.i64 q1, #0xffffffffffff
; CHECKBE-NEXT: vrev64.16 q2, q1
; CHECKBE-NEXT: vrev64.16 q1, q0
; CHECKBE-NEXT: vldrh.u16 q0, [r0]
; CHECKBE-NEXT: vorr q1, q1, q0
; CHECKBE-NEXT: vorr q1, q1, q2
; CHECKBE-NEXT: vrev64.16 q0, q1
; CHECKBE-NEXT: bx lr
; CHECKBE-NEXT: .p2align 4
; CHECKBE-NEXT: @ %bb.1:
; CHECKBE-NEXT: .LCPI32_0:
; CHECKBE-NEXT: .short 65535 @ 0xffff
; CHECKBE-NEXT: .short 0 @ 0x0
; CHECKBE-NEXT: .short 65535 @ 0xffff
; CHECKBE-NEXT: .short 65535 @ 0xffff
; CHECKBE-NEXT: .short 65535 @ 0xffff
; CHECKBE-NEXT: .short 0 @ 0x0
; CHECKBE-NEXT: .short 65535 @ 0xffff
; CHECKBE-NEXT: .short 65535 @ 0xffff
entry:
%o = or <8 x i16> %i, <i16 -1, i16 0, i16 -1, i16 -1, i16 -1, i16 0, i16 -1, i16 -1>
ret <8 x i16> %o