From cd58fb632533e9bb87d401b734fcfec62012276d Mon Sep 17 00:00:00 2001 From: John Brawn Date: Tue, 17 Mar 2020 17:58:04 +0000 Subject: [PATCH] [ARM] Avoid pointless vrev of element-wise vmov If we have an element-wise vmov immediate instruction then a subsequent vrev with width greater or equal to the vmov element width, then that vrev won't do anything. Add a DAG combine to convert bitcasts that would become such vrevs into vector_reg_casts instead. Differential Revision: https://reviews.llvm.org/D76514 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 32 +- llvm/test/CodeGen/ARM/vmov.ll | 1016 +++++++++++++------ llvm/test/CodeGen/Thumb2/mve-masked-load.ll | 6 +- llvm/test/CodeGen/Thumb2/mve-vmovimm.ll | 255 ++--- 4 files changed, 820 insertions(+), 489 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index a0f553b7eb18..bfe475723cae 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -943,6 +943,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::VECREDUCE_ADD); setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::BITCAST); } if (!Subtarget->hasFP64()) { @@ -9223,9 +9224,10 @@ static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) { N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(), N->getExtensionType(), N->isExpandingLoad()); SDValue Combo = NewLoad; - if (!PassThru.isUndef() && - (PassThru.getOpcode() != ISD::BITCAST || - !isZeroVector(PassThru->getOperand(0)))) + bool PassThruIsCastZero = (PassThru.getOpcode() == ISD::BITCAST || + PassThru.getOpcode() == ARMISD::VECTOR_REG_CAST) && + isZeroVector(PassThru->getOperand(0)); + if (!PassThru.isUndef() && !PassThruIsCastZero) Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru); return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl); } @@ -15211,6 +15213,28 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { return Res; } +static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) { + SDValue Src = N->getOperand(0); + + // We may have a bitcast of something that has already had this bitcast + // combine performed on it, so skip past any VECTOR_REG_CASTs. + while (Src.getOpcode() == ARMISD::VECTOR_REG_CAST) + Src = Src.getOperand(0); + + // Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that + // would be generated is at least the width of the element type. + EVT SrcVT = Src.getValueType(); + EVT DstVT = N->getValueType(0); + if ((Src.getOpcode() == ARMISD::VMOVIMM || + Src.getOpcode() == ARMISD::VMVNIMM || + Src.getOpcode() == ARMISD::VMOVFPIMM) && + SrcVT.getScalarSizeInBits() <= DstVT.getScalarSizeInBits() && + DAG.getDataLayout().isBigEndian()) + return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(N), DstVT, Src); + + return SDValue(); +} + SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { @@ -15264,6 +15288,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, return PerformVLDCombine(N, DCI); case ARMISD::BUILD_VECTOR: return PerformARMBUILD_VECTORCombine(N, DCI); + case ISD::BITCAST: + return PerformBITCASTCombine(N, DCI.DAG); case ARMISD::PREDICATE_CAST: return PerformPREDICATE_CASTCombine(N, DCI); case ARMISD::VECTOR_REG_CAST: diff --git a/llvm/test/CodeGen/ARM/vmov.ll b/llvm/test/CodeGen/ARM/vmov.ll index 0341448f9a77..751fd2ff557a 100644 --- a/llvm/test/CodeGen/ARM/vmov.ll +++ b/llvm/test/CodeGen/ARM/vmov.ll @@ -1,242 +1,140 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck --check-prefixes=CHECK,CHECK-LE %s -; RUN: llc -mtriple=armeb-eabi -mattr=+neon %s -o - | FileCheck --check-prefixes=CHECK,CHECK-BE %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon,+fullfp16 %s -o - | FileCheck --check-prefixes=CHECK,CHECK-LE %s +; RUN: llc -mtriple=armeb-eabi -mattr=+neon,+fullfp16 %s -o - | FileCheck --check-prefixes=CHECK,CHECK-BE %s define arm_aapcs_vfpcc <8 x i8> @v_movi8() nounwind { -; CHECK-LE-LABEL: v_movi8: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmov.i8 d0, #0x8 -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_movi8: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i8 d16, #0x8 -; CHECK-BE-NEXT: vrev64.8 d0, d16 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_movi8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i8 d0, #0x8 +; CHECK-NEXT: mov pc, lr ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > } define arm_aapcs_vfpcc <4 x i16> @v_movi16a() nounwind { -; CHECK-LE-LABEL: v_movi16a: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmov.i16 d0, #0x10 -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_movi16a: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i16 d16, #0x10 -; CHECK-BE-NEXT: vrev64.16 d0, d16 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_movi16a: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i16 d0, #0x10 +; CHECK-NEXT: mov pc, lr ret <4 x i16> < i16 16, i16 16, i16 16, i16 16 > } define arm_aapcs_vfpcc <4 x i16> @v_movi16b() nounwind { -; CHECK-LE-LABEL: v_movi16b: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmov.i16 d0, #0x1000 -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_movi16b: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i16 d16, #0x1000 -; CHECK-BE-NEXT: vrev64.16 d0, d16 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_movi16b: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i16 d0, #0x1000 +; CHECK-NEXT: mov pc, lr ret <4 x i16> < i16 4096, i16 4096, i16 4096, i16 4096 > } define arm_aapcs_vfpcc <4 x i16> @v_mvni16a() nounwind { -; CHECK-LE-LABEL: v_mvni16a: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmvn.i16 d0, #0x10 -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_mvni16a: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmvn.i16 d16, #0x10 -; CHECK-BE-NEXT: vrev64.16 d0, d16 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_mvni16a: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmvn.i16 d0, #0x10 +; CHECK-NEXT: mov pc, lr ret <4 x i16> < i16 65519, i16 65519, i16 65519, i16 65519 > } define arm_aapcs_vfpcc <4 x i16> @v_mvni16b() nounwind { -; CHECK-LE-LABEL: v_mvni16b: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmvn.i16 d0, #0x1000 -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_mvni16b: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmvn.i16 d16, #0x1000 -; CHECK-BE-NEXT: vrev64.16 d0, d16 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_mvni16b: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmvn.i16 d0, #0x1000 +; CHECK-NEXT: mov pc, lr ret <4 x i16> < i16 61439, i16 61439, i16 61439, i16 61439 > } define arm_aapcs_vfpcc <2 x i32> @v_movi32a() nounwind { -; CHECK-LE-LABEL: v_movi32a: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmov.i32 d0, #0x20 -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_movi32a: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i32 d16, #0x20 -; CHECK-BE-NEXT: vrev64.32 d0, d16 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_movi32a: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 d0, #0x20 +; CHECK-NEXT: mov pc, lr ret <2 x i32> < i32 32, i32 32 > } define arm_aapcs_vfpcc <2 x i32> @v_movi32b() nounwind { -; CHECK-LE-LABEL: v_movi32b: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmov.i32 d0, #0x2000 -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_movi32b: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i32 d16, #0x2000 -; CHECK-BE-NEXT: vrev64.32 d0, d16 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_movi32b: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 d0, #0x2000 +; CHECK-NEXT: mov pc, lr ret <2 x i32> < i32 8192, i32 8192 > } define arm_aapcs_vfpcc <2 x i32> @v_movi32c() nounwind { -; CHECK-LE-LABEL: v_movi32c: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmov.i32 d0, #0x200000 -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_movi32c: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i32 d16, #0x200000 -; CHECK-BE-NEXT: vrev64.32 d0, d16 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_movi32c: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 d0, #0x200000 +; CHECK-NEXT: mov pc, lr ret <2 x i32> < i32 2097152, i32 2097152 > } define arm_aapcs_vfpcc <2 x i32> @v_movi32d() nounwind { -; CHECK-LE-LABEL: v_movi32d: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmov.i32 d0, #0x20000000 -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_movi32d: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i32 d16, #0x20000000 -; CHECK-BE-NEXT: vrev64.32 d0, d16 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_movi32d: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 d0, #0x20000000 +; CHECK-NEXT: mov pc, lr ret <2 x i32> < i32 536870912, i32 536870912 > } define arm_aapcs_vfpcc <2 x i32> @v_movi32e() nounwind { -; CHECK-LE-LABEL: v_movi32e: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmov.i32 d0, #0x20ff -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_movi32e: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i32 d16, #0x20ff -; CHECK-BE-NEXT: vrev64.32 d0, d16 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_movi32e: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 d0, #0x20ff +; CHECK-NEXT: mov pc, lr ret <2 x i32> < i32 8447, i32 8447 > } define arm_aapcs_vfpcc <2 x i32> @v_movi32f() nounwind { -; CHECK-LE-LABEL: v_movi32f: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmov.i32 d0, #0x20ffff -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_movi32f: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i32 d16, #0x20ffff -; CHECK-BE-NEXT: vrev64.32 d0, d16 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_movi32f: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 d0, #0x20ffff +; CHECK-NEXT: mov pc, lr ret <2 x i32> < i32 2162687, i32 2162687 > } define arm_aapcs_vfpcc <2 x i32> @v_mvni32a() nounwind { -; CHECK-LE-LABEL: v_mvni32a: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmvn.i32 d0, #0x20 -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_mvni32a: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmvn.i32 d16, #0x20 -; CHECK-BE-NEXT: vrev64.32 d0, d16 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_mvni32a: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmvn.i32 d0, #0x20 +; CHECK-NEXT: mov pc, lr ret <2 x i32> < i32 4294967263, i32 4294967263 > } define arm_aapcs_vfpcc <2 x i32> @v_mvni32b() nounwind { -; CHECK-LE-LABEL: v_mvni32b: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmvn.i32 d0, #0x2000 -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_mvni32b: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmvn.i32 d16, #0x2000 -; CHECK-BE-NEXT: vrev64.32 d0, d16 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_mvni32b: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmvn.i32 d0, #0x2000 +; CHECK-NEXT: mov pc, lr ret <2 x i32> < i32 4294959103, i32 4294959103 > } define arm_aapcs_vfpcc <2 x i32> @v_mvni32c() nounwind { -; CHECK-LE-LABEL: v_mvni32c: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmvn.i32 d0, #0x200000 -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_mvni32c: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmvn.i32 d16, #0x200000 -; CHECK-BE-NEXT: vrev64.32 d0, d16 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_mvni32c: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmvn.i32 d0, #0x200000 +; CHECK-NEXT: mov pc, lr ret <2 x i32> < i32 4292870143, i32 4292870143 > } define arm_aapcs_vfpcc <2 x i32> @v_mvni32d() nounwind { -; CHECK-LE-LABEL: v_mvni32d: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmvn.i32 d0, #0x20000000 -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_mvni32d: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmvn.i32 d16, #0x20000000 -; CHECK-BE-NEXT: vrev64.32 d0, d16 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_mvni32d: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmvn.i32 d0, #0x20000000 +; CHECK-NEXT: mov pc, lr ret <2 x i32> < i32 3758096383, i32 3758096383 > } define arm_aapcs_vfpcc <2 x i32> @v_mvni32e() nounwind { -; CHECK-LE-LABEL: v_mvni32e: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmvn.i32 d0, #0x20ff -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_mvni32e: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmvn.i32 d16, #0x20ff -; CHECK-BE-NEXT: vrev64.32 d0, d16 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_mvni32e: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmvn.i32 d0, #0x20ff +; CHECK-NEXT: mov pc, lr ret <2 x i32> < i32 4294958848, i32 4294958848 > } define arm_aapcs_vfpcc <2 x i32> @v_mvni32f() nounwind { -; CHECK-LE-LABEL: v_mvni32f: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmvn.i32 d0, #0x20ffff -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_mvni32f: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmvn.i32 d16, #0x20ffff -; CHECK-BE-NEXT: vrev64.32 d0, d16 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_mvni32f: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmvn.i32 d0, #0x20ffff +; CHECK-NEXT: mov pc, lr ret <2 x i32> < i32 4292804608, i32 4292804608 > } @@ -249,128 +147,74 @@ define arm_aapcs_vfpcc <1 x i64> @v_movi64() nounwind { } define arm_aapcs_vfpcc <16 x i8> @v_movQi8() nounwind { -; CHECK-LE-LABEL: v_movQi8: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmov.i8 q0, #0x8 -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_movQi8: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i8 q8, #0x8 -; CHECK-BE-NEXT: vrev64.8 q0, q8 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_movQi8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i8 q0, #0x8 +; CHECK-NEXT: mov pc, lr ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > } define arm_aapcs_vfpcc <8 x i16> @v_movQi16a() nounwind { -; CHECK-LE-LABEL: v_movQi16a: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmov.i16 q0, #0x10 -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_movQi16a: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i16 q8, #0x10 -; CHECK-BE-NEXT: vrev64.16 q0, q8 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_movQi16a: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i16 q0, #0x10 +; CHECK-NEXT: mov pc, lr ret <8 x i16> < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 > } define arm_aapcs_vfpcc <8 x i16> @v_movQi16b() nounwind { -; CHECK-LE-LABEL: v_movQi16b: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmov.i16 q0, #0x1000 -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_movQi16b: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i16 q8, #0x1000 -; CHECK-BE-NEXT: vrev64.16 q0, q8 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_movQi16b: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i16 q0, #0x1000 +; CHECK-NEXT: mov pc, lr ret <8 x i16> < i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096 > } define arm_aapcs_vfpcc <4 x i32> @v_movQi32a() nounwind { -; CHECK-LE-LABEL: v_movQi32a: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmov.i32 q0, #0x20 -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_movQi32a: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i32 q8, #0x20 -; CHECK-BE-NEXT: vrev64.32 q0, q8 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_movQi32a: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q0, #0x20 +; CHECK-NEXT: mov pc, lr ret <4 x i32> < i32 32, i32 32, i32 32, i32 32 > } define arm_aapcs_vfpcc <4 x i32> @v_movQi32b() nounwind { -; CHECK-LE-LABEL: v_movQi32b: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmov.i32 q0, #0x2000 -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_movQi32b: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i32 q8, #0x2000 -; CHECK-BE-NEXT: vrev64.32 q0, q8 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_movQi32b: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q0, #0x2000 +; CHECK-NEXT: mov pc, lr ret <4 x i32> < i32 8192, i32 8192, i32 8192, i32 8192 > } define arm_aapcs_vfpcc <4 x i32> @v_movQi32c() nounwind { -; CHECK-LE-LABEL: v_movQi32c: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmov.i32 q0, #0x200000 -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_movQi32c: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i32 q8, #0x200000 -; CHECK-BE-NEXT: vrev64.32 q0, q8 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_movQi32c: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q0, #0x200000 +; CHECK-NEXT: mov pc, lr ret <4 x i32> < i32 2097152, i32 2097152, i32 2097152, i32 2097152 > } define arm_aapcs_vfpcc <4 x i32> @v_movQi32d() nounwind { -; CHECK-LE-LABEL: v_movQi32d: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmov.i32 q0, #0x20000000 -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_movQi32d: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i32 q8, #0x20000000 -; CHECK-BE-NEXT: vrev64.32 q0, q8 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_movQi32d: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q0, #0x20000000 +; CHECK-NEXT: mov pc, lr ret <4 x i32> < i32 536870912, i32 536870912, i32 536870912, i32 536870912 > } define arm_aapcs_vfpcc <4 x i32> @v_movQi32e() nounwind { -; CHECK-LE-LABEL: v_movQi32e: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmov.i32 q0, #0x20ff -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_movQi32e: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i32 q8, #0x20ff -; CHECK-BE-NEXT: vrev64.32 q0, q8 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_movQi32e: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q0, #0x20ff +; CHECK-NEXT: mov pc, lr ret <4 x i32> < i32 8447, i32 8447, i32 8447, i32 8447 > } define arm_aapcs_vfpcc <4 x i32> @v_movQi32f() nounwind { -; CHECK-LE-LABEL: v_movQi32f: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmov.i32 q0, #0x20ffff -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_movQi32f: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i32 q8, #0x20ffff -; CHECK-BE-NEXT: vrev64.32 q0, q8 -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_movQi32f: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q0, #0x20ffff +; CHECK-NEXT: mov pc, lr ret <4 x i32> < i32 2162687, i32 2162687, i32 2162687, i32 2162687 > } @@ -390,18 +234,11 @@ define arm_aapcs_vfpcc <2 x i64> @v_movQi64() nounwind { ; Check for correct assembler printing for immediate values. %struct.int8x8_t = type { <8 x i8> } define arm_aapcs_vfpcc void @vdupn128(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind { -; CHECK-LE-LABEL: vdupn128: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: vmov.i8 d16, #0x80 -; CHECK-LE-NEXT: vstr d16, [r0] -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: vdupn128: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: vmov.i8 d16, #0x80 -; CHECK-BE-NEXT: vrev64.8 d16, d16 -; CHECK-BE-NEXT: vstr d16, [r0] -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: vdupn128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i8 d16, #0x80 +; CHECK-NEXT: vstr d16, [r0] +; CHECK-NEXT: mov pc, lr entry: %0 = getelementptr inbounds %struct.int8x8_t, %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1] store <8 x i8> , <8 x i8>* %0, align 8 @@ -409,18 +246,11 @@ entry: } define arm_aapcs_vfpcc void @vdupnneg75(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind { -; CHECK-LE-LABEL: vdupnneg75: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: vmov.i8 d16, #0xb5 -; CHECK-LE-NEXT: vstr d16, [r0] -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: vdupnneg75: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: vmov.i8 d16, #0xb5 -; CHECK-BE-NEXT: vrev64.8 d16, d16 -; CHECK-BE-NEXT: vstr d16, [r0] -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: vdupnneg75: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i8 d16, #0xb5 +; CHECK-NEXT: vstr d16, [r0] +; CHECK-NEXT: mov pc, lr entry: %0 = getelementptr inbounds %struct.int8x8_t, %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1] store <8 x i8> , <8 x i8>* %0, align 8 @@ -784,18 +614,11 @@ define arm_aapcs_vfpcc void @noTruncStore(<4 x i32>* %a, <4 x i16>* %b) nounwind ; Use vmov.f32 to materialize f32 immediate splats ; rdar://10437054 define arm_aapcs_vfpcc void @v_mov_v2f32(<2 x float>* nocapture %p) nounwind { -; CHECK-LE-LABEL: v_mov_v2f32: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: vmov.f32 d16, #-1.600000e+01 -; CHECK-LE-NEXT: vstr d16, [r0] -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_mov_v2f32: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: vmov.f32 d16, #-1.600000e+01 -; CHECK-BE-NEXT: vrev64.32 d16, d16 -; CHECK-BE-NEXT: vstr d16, [r0] -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_mov_v2f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f32 d16, #-1.600000e+01 +; CHECK-NEXT: vstr d16, [r0] +; CHECK-NEXT: mov pc, lr entry: store <2 x float> , <2 x float>* %p, align 4 ret void @@ -811,7 +634,6 @@ define arm_aapcs_vfpcc void @v_mov_v4f32(<4 x float>* nocapture %p) nounwind { ; CHECK-BE-LABEL: v_mov_v4f32: ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: vmov.f32 q8, #3.100000e+01 -; CHECK-BE-NEXT: vrev64.32 q8, q8 ; CHECK-BE-NEXT: vstmia r0, {d16, d17} ; CHECK-BE-NEXT: mov pc, lr entry: @@ -861,7 +683,6 @@ define arm_aapcs_vfpcc void @any_extend(<4 x i1> %x, <4 x i32> %y) nounwind ssp ; CHECK-BE-NEXT: vmov.i16 d16, #0x1 ; CHECK-BE-NEXT: vrev64.32 d17, d0 ; CHECK-BE-NEXT: vrev64.32 q9, q1 -; CHECK-BE-NEXT: vrev32.16 d16, d16 ; CHECK-BE-NEXT: vand d16, d17, d16 ; CHECK-BE-NEXT: vrev32.16 d16, d16 ; CHECK-BE-NEXT: vmovl.u16 q8, d16 @@ -878,4 +699,599 @@ entry: unreachable } +define arm_aapcs_vfpcc void @v_movi8_sti8(i8* %p) { +; CHECK-LABEL: v_movi8_sti8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i8 d16, #0x1 +; CHECK-NEXT: vst1.8 {d16}, [r0] +; CHECK-NEXT: mov pc, lr + call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %p, <8 x i8> , i32 1) + ret void +} + +define arm_aapcs_vfpcc void @v_movi8_sti16(i8* %p) { +; CHECK-LABEL: v_movi8_sti16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i8 d16, #0x1 +; CHECK-NEXT: vst1.16 {d16}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <8 x i8> to <4 x i16> + call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* %p, <4 x i16> %val, i32 2) + ret void +} + +define arm_aapcs_vfpcc void @v_movi8_stf16(i8* %p) { +; CHECK-LABEL: v_movi8_stf16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i8 d16, #0x1 +; CHECK-NEXT: vst1.16 {d16}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <8 x i8> to <4 x half> + call void @llvm.arm.neon.vst1.p0i8.v4f16(i8* %p, <4 x half> %val, i32 2) + ret void +} + +define arm_aapcs_vfpcc void @v_movi8_sti32(i8* %p) { +; CHECK-LABEL: v_movi8_sti32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i8 d16, #0x1 +; CHECK-NEXT: vst1.32 {d16}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <8 x i8> to <2 x i32> + call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* %p, <2 x i32> %val, i32 4) + ret void +} + +define arm_aapcs_vfpcc void @v_movi8_stf32(i8* %p) { +; CHECK-LABEL: v_movi8_stf32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i8 d16, #0x1 +; CHECK-NEXT: vst1.32 {d16}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <8 x i8> to <2 x float> + call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* %p, <2 x float> %val, i32 4) + ret void +} + +define arm_aapcs_vfpcc void @v_movi8_sti64(i8* %p) { +; CHECK-LABEL: v_movi8_sti64: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i8 d16, #0x1 +; CHECK-NEXT: vst1.64 {d16}, [r0:64] +; CHECK-NEXT: mov pc, lr + %val = bitcast <8 x i8> to <1 x i64> + call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* %p, <1 x i64> %val, i32 8) + ret void +} + +define arm_aapcs_vfpcc void @v_movi16_sti16(i8* %p) { +; CHECK-LABEL: v_movi16_sti16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i16 d16, #0x1 +; CHECK-NEXT: vst1.16 {d16}, [r0] +; CHECK-NEXT: mov pc, lr + call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* %p, <4 x i16> , i32 2) + ret void +} + +define arm_aapcs_vfpcc void @v_movi16_stf16(i8* %p) { +; CHECK-LABEL: v_movi16_stf16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i16 d16, #0x1 +; CHECK-NEXT: vst1.16 {d16}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <4 x i16> to <4 x half> + call void @llvm.arm.neon.vst1.p0i8.v4f16(i8* %p, <4 x half> %val, i32 2) + ret void +} + +define arm_aapcs_vfpcc void @v_movi16_sti32(i8* %p) { +; CHECK-LABEL: v_movi16_sti32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i16 d16, #0x1 +; CHECK-NEXT: vst1.32 {d16}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <4 x i16> to <2 x i32> + call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* %p, <2 x i32> %val, i32 4) + ret void +} + +define arm_aapcs_vfpcc void @v_movi16_stf32(i8* %p) { +; CHECK-LABEL: v_movi16_stf32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i16 d16, #0x1 +; CHECK-NEXT: vst1.32 {d16}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <4 x i16> to <2 x float> + call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* %p, <2 x float> %val, i32 4) + ret void +} + +define arm_aapcs_vfpcc void @v_movi16_sti64(i8* %p) { +; CHECK-LABEL: v_movi16_sti64: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i16 d16, #0x1 +; CHECK-NEXT: vst1.64 {d16}, [r0:64] +; CHECK-NEXT: mov pc, lr + %val = bitcast <4 x i16> to <1 x i64> + call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* %p, <1 x i64> %val, i32 8) + ret void +} + +define arm_aapcs_vfpcc void @v_movi32_sti32(i8* %p) { +; CHECK-LABEL: v_movi32_sti32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 d16, #0x1 +; CHECK-NEXT: vst1.32 {d16}, [r0] +; CHECK-NEXT: mov pc, lr + call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* %p, <2 x i32> , i32 4) + ret void +} + +define arm_aapcs_vfpcc void @v_movi32_stf32(i8* %p) { +; CHECK-LABEL: v_movi32_stf32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 d16, #0x1 +; CHECK-NEXT: vst1.32 {d16}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <2 x i32> to <2 x float> + call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* %p, <2 x float> %val, i32 4) + ret void +} + +define arm_aapcs_vfpcc void @v_movi32_sti64(i8* %p) { +; CHECK-LABEL: v_movi32_sti64: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 d16, #0x1 +; CHECK-NEXT: vst1.64 {d16}, [r0:64] +; CHECK-NEXT: mov pc, lr + %val = bitcast <2 x i32> to <1 x i64> + call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* %p, <1 x i64> %val, i32 8) + ret void +} + +define arm_aapcs_vfpcc void @v_movf32_stf32(i8* %p) { +; CHECK-LABEL: v_movf32_stf32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.f32 d16, #1.000000e+00 +; CHECK-NEXT: vst1.32 {d16}, [r0] +; CHECK-NEXT: mov pc, lr + call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* %p, <2 x float> , i32 4) + ret void +} + +define arm_aapcs_vfpcc void@v_movf32_sti32(i8* %p) { +; FIXME: We should use vmov.f32 instead of mov then vdup +; CHECK-LABEL: v_movf32_sti32: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov r1, #1065353216 +; CHECK-NEXT: vdup.32 d16, r1 +; CHECK-NEXT: vst1.32 {d16}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <2 x float> to <2 x i32> + call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* %p, <2 x i32> %val, i32 4) + ret void +} + +define arm_aapcs_vfpcc void @v_movf32_sti64(i8* %p) { +; CHECK-LE-LABEL: v_movf32_sti64: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: mov r1, #1065353216 +; CHECK-LE-NEXT: vdup.32 d16, r1 +; CHECK-LE-NEXT: vst1.64 {d16}, [r0:64] +; CHECK-LE-NEXT: mov pc, lr +; +; FIXME: vrev is not needed here +; CHECK-BE-LABEL: v_movf32_sti64: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: mov r1, #1065353216 +; CHECK-BE-NEXT: vdup.32 d16, r1 +; CHECK-BE-NEXT: vrev64.32 d16, d16 +; CHECK-BE-NEXT: vst1.64 {d16}, [r0:64] +; CHECK-BE-NEXT: mov pc, lr + %val = bitcast <2 x float> to <1 x i64> + call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* %p, <1 x i64> %val, i32 8) + ret void +} + +define arm_aapcs_vfpcc void @v_movi64_sti64(i8* %p) { +; CHECK-LABEL: v_movi64_sti64: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i64 d16, #0xff +; CHECK-NEXT: vst1.64 {d16}, [r0:64] +; CHECK-NEXT: mov pc, lr + call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* %p, <1 x i64> , i32 8) + ret void +} + +define arm_aapcs_vfpcc void @v_movQi8_sti8(i8* %p) { +; CHECK-LABEL: v_movQi8_sti8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i8 q8, #0x1 +; CHECK-NEXT: vst1.8 {d16, d17}, [r0] +; CHECK-NEXT: mov pc, lr + call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %p, <16 x i8> , i32 1) + ret void +} + +define arm_aapcs_vfpcc void @v_movQi8_sti16(i8* %p) { +; CHECK-LABEL: v_movQi8_sti16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i8 q8, #0x1 +; CHECK-NEXT: vst1.16 {d16, d17}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <16 x i8> to <8 x i16> + call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %p, <8 x i16> %val, i32 2) + ret void +} + +define arm_aapcs_vfpcc void @v_movQi8_stf16(i8* %p) { +; CHECK-LABEL: v_movQi8_stf16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i8 q8, #0x1 +; CHECK-NEXT: vst1.16 {d16, d17}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <16 x i8> to <8 x half> + call void @llvm.arm.neon.vst1.p0i8.v8f16(i8* %p, <8 x half> %val, i32 2) + ret void +} + +define arm_aapcs_vfpcc void @v_movQi8_sti32(i8* %p) { +; CHECK-LABEL: v_movQi8_sti32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i8 q8, #0x1 +; CHECK-NEXT: vst1.32 {d16, d17}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <16 x i8> to <4 x i32> + call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* %p, <4 x i32> %val, i32 4) + ret void +} + +define arm_aapcs_vfpcc void @v_movQi8_stf32(i8* %p) { +; CHECK-LABEL: v_movQi8_stf32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i8 q8, #0x1 +; CHECK-NEXT: vst1.32 {d16, d17}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <16 x i8> to <4 x float> + call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %p, <4 x float> %val, i32 4) + ret void +} + +define arm_aapcs_vfpcc void @v_movQi8_sti64(i8* %p) { +; CHECK-LABEL: v_movQi8_sti64: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i8 q8, #0x1 +; CHECK-NEXT: vst1.64 {d16, d17}, [r0:64] +; CHECK-NEXT: mov pc, lr + %val = bitcast <16 x i8> to <2 x i64> + call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* %p, <2 x i64> %val, i32 8) + ret void +} + +define arm_aapcs_vfpcc void @v_movQi16_sti16(i8* %p) { +; CHECK-LABEL: v_movQi16_sti16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i16 q8, #0x1 +; CHECK-NEXT: vst1.16 {d16, d17}, [r0] +; CHECK-NEXT: mov pc, lr + call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %p, <8 x i16> , i32 2) + ret void +} + +define arm_aapcs_vfpcc void @v_movQi16_stf16(i8* %p) { +; CHECK-LABEL: v_movQi16_stf16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i16 q8, #0x1 +; CHECK-NEXT: vst1.16 {d16, d17}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <8 x i16> to <8 x half> + call void @llvm.arm.neon.vst1.p0i8.v8f16(i8* %p, <8 x half> %val, i32 2) + ret void +} + +define arm_aapcs_vfpcc void @v_movQi16_sti32(i8* %p) { +; CHECK-LABEL: v_movQi16_sti32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i16 q8, #0x1 +; CHECK-NEXT: vst1.32 {d16, d17}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <8 x i16> to <4 x i32> + call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* %p, <4 x i32> %val, i32 4) + ret void +} + +define arm_aapcs_vfpcc void @v_movQi16_stf32(i8* %p) { +; CHECK-LABEL: v_movQi16_stf32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i16 q8, #0x1 +; CHECK-NEXT: vst1.32 {d16, d17}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <8 x i16> to <4 x float> + call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %p, <4 x float> %val, i32 4) + ret void +} + +define arm_aapcs_vfpcc void @v_movQi16_sti64(i8* %p) { +; CHECK-LABEL: v_movQi16_sti64: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i16 q8, #0x1 +; CHECK-NEXT: vst1.64 {d16, d17}, [r0:64] +; CHECK-NEXT: mov pc, lr + %val = bitcast <8 x i16> to <2 x i64> + call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* %p, <2 x i64> %val, i32 8) + ret void +} + +define arm_aapcs_vfpcc void @v_movQi32_sti32(i8* %p) { +; CHECK-LABEL: v_movQi32_sti32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q8, #0x1 +; CHECK-NEXT: vst1.32 {d16, d17}, [r0] +; CHECK-NEXT: mov pc, lr + call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* %p, <4 x i32> , i32 4) + ret void +} + +define arm_aapcs_vfpcc void @v_movQi32_stf32(i8* %p) { +; CHECK-LABEL: v_movQi32_stf32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q8, #0x1 +; CHECK-NEXT: vst1.32 {d16, d17}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <4 x i32> to <4 x float> + call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %p, <4 x float> %val, i32 4) + ret void +} + +define arm_aapcs_vfpcc void @v_movQi32_sti64(i8* %p) { +; CHECK-LABEL: v_movQi32_sti64: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q8, #0x1 +; CHECK-NEXT: vst1.64 {d16, d17}, [r0:64] +; CHECK-NEXT: mov pc, lr + %val = bitcast <4 x i32> to <2 x i64> + call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* %p, <2 x i64> %val, i32 8) + ret void +} + +define arm_aapcs_vfpcc void @v_movQf32_stf32(i8* %p) { +; CHECK-LABEL: v_movQf32_stf32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.f32 q8, #1.000000e+00 +; CHECK-NEXT: vst1.32 {d16, d17}, [r0] +; CHECK-NEXT: mov pc, lr + call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %p, <4 x float> , i32 4) + ret void +} + +define arm_aapcs_vfpcc void @v_movQf32_sti32(i8* %p) { +; FIXME: We should use vmov.f32 instead of mov then vdup +; CHECK-LABEL: v_movQf32_sti32: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov r1, #1065353216 +; CHECK-NEXT: vdup.32 q8, r1 +; CHECK-NEXT: vst1.32 {d16, d17}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <4 x float> to <4 x i32> + call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* %p, <4 x i32> %val, i32 4) + ret void +} + +define arm_aapcs_vfpcc void @v_movQf32_sti64(i8* %p) { +; CHECK-LE-LABEL: v_movQf32_sti64: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: mov r1, #1065353216 +; CHECK-LE-NEXT: vdup.32 q8, r1 +; CHECK-LE-NEXT: vst1.64 {d16, d17}, [r0:64] +; CHECK-LE-NEXT: mov pc, lr +; +; FIXME: vrev is not needed here +; CHECK-BE-LABEL: v_movQf32_sti64: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: mov r1, #1065353216 +; CHECK-BE-NEXT: vdup.32 q8, r1 +; CHECK-BE-NEXT: vrev64.32 q8, q8 +; CHECK-BE-NEXT: vst1.64 {d16, d17}, [r0:64] +; CHECK-BE-NEXT: mov pc, lr + %val = bitcast <4 x float> to <2 x i64> + call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* %p, <2 x i64> %val, i32 8) + ret void +} + +define arm_aapcs_vfpcc void @v_movQi64_sti64(i8* %p) { +; CHECK-LABEL: v_movQi64_sti64: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i64 q8, #0xff +; CHECK-NEXT: vst1.64 {d16, d17}, [r0:64] +; CHECK-NEXT: mov pc, lr + call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* %p, <2 x i64> , i32 8) + ret void +} + +define arm_aapcs_vfpcc void @v_mvni16_sti16(i8* %p) { +; CHECK-LABEL: v_mvni16_sti16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmvn.i16 d16, #0xfe +; CHECK-NEXT: vst1.16 {d16}, [r0] +; CHECK-NEXT: mov pc, lr + call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* %p, <4 x i16> , i32 2) + ret void +} + +define arm_aapcs_vfpcc void @v_mvni16_stf16(i8* %p) { +; CHECK-LABEL: v_mvni16_stf16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmvn.i16 d16, #0xfe +; CHECK-NEXT: vst1.16 {d16}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <4 x i16> to <4 x half> + call void @llvm.arm.neon.vst1.p0i8.v4f16(i8* %p, <4 x half> %val, i32 2) + ret void +} + +define arm_aapcs_vfpcc void @v_mvni16_sti32(i8* %p) { +; CHECK-LABEL: v_mvni16_sti32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmvn.i16 d16, #0xfe +; CHECK-NEXT: vst1.32 {d16}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <4 x i16> to <2 x i32> + call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* %p, <2 x i32> %val, i32 4) + ret void +} + +define arm_aapcs_vfpcc void @v_mvni16_stf32(i8* %p) { +; CHECK-LABEL: v_mvni16_stf32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmvn.i16 d16, #0xfe +; CHECK-NEXT: vst1.32 {d16}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <4 x i16> to <2 x float> + call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* %p, <2 x float> %val, i32 4) + ret void +} + +define arm_aapcs_vfpcc void @v_mvni16_sti64(i8* %p) { +; CHECK-LABEL: v_mvni16_sti64: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmvn.i16 d16, #0xfe +; CHECK-NEXT: vst1.64 {d16}, [r0:64] +; CHECK-NEXT: mov pc, lr + %val = bitcast <4 x i16> to <1 x i64> + call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* %p, <1 x i64> %val, i32 8) + ret void +} + +define arm_aapcs_vfpcc void @v_mvni32_sti32(i8* %p) { +; CHECK-LABEL: v_mvni32_sti32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmvn.i32 d16, #0xfe +; CHECK-NEXT: vst1.32 {d16}, [r0] +; CHECK-NEXT: mov pc, lr + call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* %p, <2 x i32> , i32 4) + ret void +} + +define arm_aapcs_vfpcc void @v_mvni32_stf32(i8* %p) { +; CHECK-LABEL: v_mvni32_stf32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmvn.i32 d16, #0xfe +; CHECK-NEXT: vst1.32 {d16}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <2 x i32> to <2 x float> + call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* %p, <2 x float> %val, i32 4) + ret void +} + +define arm_aapcs_vfpcc void @v_mvni32_sti64(i8* %p) { +; CHECK-LABEL: v_mvni32_sti64: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmvn.i32 d16, #0xfe +; CHECK-NEXT: vst1.64 {d16}, [r0:64] +; CHECK-NEXT: mov pc, lr + %val = bitcast <2 x i32> to <1 x i64> + call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* %p, <1 x i64> %val, i32 8) + ret void +} + + +define arm_aapcs_vfpcc void @v_mvnQi16_sti16(i8* %p) { +; CHECK-LABEL: v_mvnQi16_sti16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmvn.i16 q8, #0xfe +; CHECK-NEXT: vst1.16 {d16, d17}, [r0] +; CHECK-NEXT: mov pc, lr + call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %p, <8 x i16> , i32 2) + ret void +} + +define arm_aapcs_vfpcc void @v_mvnQi16_stf16(i8* %p) { +; CHECK-LABEL: v_mvnQi16_stf16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmvn.i16 q8, #0xfe +; CHECK-NEXT: vst1.16 {d16, d17}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <8 x i16> to <8 x half> + call void @llvm.arm.neon.vst1.p0i8.v8f16(i8* %p, <8 x half> %val, i32 2) + ret void +} + +define arm_aapcs_vfpcc void @v_mvnQi16_sti32(i8* %p) { +; CHECK-LABEL: v_mvnQi16_sti32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmvn.i16 q8, #0xfe +; CHECK-NEXT: vst1.32 {d16, d17}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <8 x i16> to <4 x i32> + call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* %p, <4 x i32> %val, i32 4) + ret void +} + +define arm_aapcs_vfpcc void @v_mvnQi16_stf32(i8* %p) { +; CHECK-LABEL: v_mvnQi16_stf32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmvn.i16 q8, #0xfe +; CHECK-NEXT: vst1.32 {d16, d17}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <8 x i16> to <4 x float> + call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %p, <4 x float> %val, i32 4) + ret void +} + +define arm_aapcs_vfpcc void @v_mvnQi16_sti64(i8* %p) { +; CHECK-LABEL: v_mvnQi16_sti64: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmvn.i16 q8, #0xfe +; CHECK-NEXT: vst1.64 {d16, d17}, [r0:64] +; CHECK-NEXT: mov pc, lr + %val = bitcast <8 x i16> to <2 x i64> + call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* %p, <2 x i64> %val, i32 8) + ret void +} + +define arm_aapcs_vfpcc void @v_mvnQi32_sti32(i8* %p) { +; CHECK-LABEL: v_mvnQi32_sti32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmvn.i32 q8, #0xfe +; CHECK-NEXT: vst1.32 {d16, d17}, [r0] +; CHECK-NEXT: mov pc, lr + call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* %p, <4 x i32> , i32 4) + ret void +} + +define arm_aapcs_vfpcc void @v_mvnQi32_stf32(i8* %p) { +; CHECK-LABEL: v_mvnQi32_stf32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmvn.i32 q8, #0xfe +; CHECK-NEXT: vst1.32 {d16, d17}, [r0] +; CHECK-NEXT: mov pc, lr + %val = bitcast <4 x i32> to <4 x float> + call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %p, <4 x float> %val, i32 4) + ret void +} + +define arm_aapcs_vfpcc void @v_mvnQi32_sti64(i8* %p) { +; CHECK-LABEL: v_mvnQi32_sti64: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmvn.i32 q8, #0xfe +; CHECK-NEXT: vst1.64 {d16, d17}, [r0:64] +; CHECK-NEXT: mov pc, lr + %val = bitcast <4 x i32> to <2 x i64> + call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* %p, <2 x i64> %val, i32 8) + ret void +} + +declare void @llvm.arm.neon.vst1.p0i8.v8i8(i8*, <8 x i8>, i32) nounwind declare void @llvm.arm.neon.vst1.p0i8.v4i16(i8*, <4 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst1.p0i8.v4f16(i8*, <4 x half>, i32) nounwind +declare void @llvm.arm.neon.vst1.p0i8.v2i32(i8*, <2 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst1.p0i8.v2f32(i8*, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst1.p0i8.v1i64(i8*, <1 x i64>, i32) nounwind + +declare void @llvm.arm.neon.vst1.p0i8.v16i8(i8*, <16 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst1.p0i8.v8f16(i8*, <8 x half>, i32) nounwind +declare void @llvm.arm.neon.vst1.p0i8.v4i32(i8*, <4 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst1.p0i8.v4f32(i8*, <4 x float>, i32) nounwind +declare void @llvm.arm.neon.vst1.p0i8.v2i64(i8*, <2 x i64>, i32) nounwind diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll index 5bddc5ed8333..ca51edb92a2a 100644 --- a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll @@ -1830,8 +1830,7 @@ define arm_aapcs_vfpcc <2 x i64> @masked_v2i64_align4_zero(<2 x i64> *%dest, <2 ; CHECK-BE-NEXT: vldr d0, [r0] ; CHECK-BE-NEXT: b .LBB49_3 ; CHECK-BE-NEXT: .LBB49_2: -; CHECK-BE-NEXT: vmov.i32 q1, #0x0 -; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 ; CHECK-BE-NEXT: .LBB49_3: @ %else ; CHECK-BE-NEXT: lsls r1, r1, #30 ; CHECK-BE-NEXT: it mi @@ -1924,8 +1923,7 @@ define arm_aapcs_vfpcc <2 x double> @masked_v2f64_align4_zero(<2 x double> *%des ; CHECK-BE-NEXT: vldr d0, [r0] ; CHECK-BE-NEXT: b .LBB50_3 ; CHECK-BE-NEXT: .LBB50_2: -; CHECK-BE-NEXT: vmov.i32 q1, #0x0 -; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 ; CHECK-BE-NEXT: .LBB50_3: @ %else ; CHECK-BE-NEXT: lsls r1, r1, #30 ; CHECK-BE-NEXT: it mi diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll index 2173112d9640..77dd9c5df95b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll @@ -4,91 +4,55 @@ ; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKBE define arm_aapcs_vfpcc <16 x i8> @mov_int8_1() { -; CHECKLE-LABEL: mov_int8_1: -; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: vmov.i8 q0, #0x1 -; CHECKLE-NEXT: bx lr -; -; CHECKBE-LABEL: mov_int8_1: -; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i8 q1, #0x1 -; CHECKBE-NEXT: vrev64.8 q0, q1 -; CHECKBE-NEXT: bx lr +; CHECK-LABEL: mov_int8_1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i8 q0, #0x1 +; CHECK-NEXT: bx lr entry: ret <16 x i8> } define arm_aapcs_vfpcc <16 x i8> @mov_int8_m1() { -; CHECKLE-LABEL: mov_int8_m1: -; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: vmov.i8 q0, #0xff -; CHECKLE-NEXT: bx lr -; -; CHECKBE-LABEL: mov_int8_m1: -; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i8 q1, #0xff -; CHECKBE-NEXT: vrev64.8 q0, q1 -; CHECKBE-NEXT: bx lr +; CHECK-LABEL: mov_int8_m1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i8 q0, #0xff +; CHECK-NEXT: bx lr entry: ret <16 x i8> } define arm_aapcs_vfpcc <8 x i16> @mov_int16_1() { -; CHECKLE-LABEL: mov_int16_1: -; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: vmov.i16 q0, #0x1 -; CHECKLE-NEXT: bx lr -; -; CHECKBE-LABEL: mov_int16_1: -; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i16 q1, #0x1 -; CHECKBE-NEXT: vrev64.16 q0, q1 -; CHECKBE-NEXT: bx lr +; CHECK-LABEL: mov_int16_1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i16 q0, #0x1 +; CHECK-NEXT: bx lr entry: ret <8 x i16> } define arm_aapcs_vfpcc <8 x i16> @mov_int16_m1() { -; CHECKLE-LABEL: mov_int16_m1: -; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: vmov.i8 q0, #0xff -; CHECKLE-NEXT: bx lr -; -; CHECKBE-LABEL: mov_int16_m1: -; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i8 q1, #0xff -; CHECKBE-NEXT: vrev64.8 q0, q1 -; CHECKBE-NEXT: bx lr +; CHECK-LABEL: mov_int16_m1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i8 q0, #0xff +; CHECK-NEXT: bx lr entry: ret <8 x i16> } define arm_aapcs_vfpcc <8 x i16> @mov_int16_256() { -; CHECKLE-LABEL: mov_int16_256: -; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: vmov.i16 q0, #0x100 -; CHECKLE-NEXT: bx lr -; -; CHECKBE-LABEL: mov_int16_256: -; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i16 q1, #0x100 -; CHECKBE-NEXT: vrev64.16 q0, q1 -; CHECKBE-NEXT: bx lr +; CHECK-LABEL: mov_int16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i16 q0, #0x100 +; CHECK-NEXT: bx lr entry: ret <8 x i16> } define arm_aapcs_vfpcc <8 x i16> @mov_int16_257() { -; CHECKLE-LABEL: mov_int16_257: -; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: vmov.i8 q0, #0x1 -; CHECKLE-NEXT: bx lr -; -; CHECKBE-LABEL: mov_int16_257: -; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i8 q1, #0x1 -; CHECKBE-NEXT: vrev64.8 q0, q1 -; CHECKBE-NEXT: bx lr +; CHECK-LABEL: mov_int16_257: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i8 q0, #0x1 +; CHECK-NEXT: bx lr entry: ret <8 x i16> } @@ -125,61 +89,37 @@ entry: } define arm_aapcs_vfpcc <4 x i32> @mov_int32_1() { -; CHECKLE-LABEL: mov_int32_1: -; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: vmov.i32 q0, #0x1 -; CHECKLE-NEXT: bx lr -; -; CHECKBE-LABEL: mov_int32_1: -; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i32 q1, #0x1 -; CHECKBE-NEXT: vrev64.32 q0, q1 -; CHECKBE-NEXT: bx lr +; CHECK-LABEL: mov_int32_1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q0, #0x1 +; CHECK-NEXT: bx lr entry: ret <4 x i32> } define arm_aapcs_vfpcc <4 x i32> @mov_int32_256() { -; CHECKLE-LABEL: mov_int32_256: -; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: vmov.i32 q0, #0x100 -; CHECKLE-NEXT: bx lr -; -; CHECKBE-LABEL: mov_int32_256: -; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i32 q1, #0x100 -; CHECKBE-NEXT: vrev64.32 q0, q1 -; CHECKBE-NEXT: bx lr +; CHECK-LABEL: mov_int32_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q0, #0x100 +; CHECK-NEXT: bx lr entry: ret <4 x i32> } define arm_aapcs_vfpcc <4 x i32> @mov_int32_65536() { -; CHECKLE-LABEL: mov_int32_65536: -; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: vmov.i32 q0, #0x10000 -; CHECKLE-NEXT: bx lr -; -; CHECKBE-LABEL: mov_int32_65536: -; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i32 q1, #0x10000 -; CHECKBE-NEXT: vrev64.32 q0, q1 -; CHECKBE-NEXT: bx lr +; CHECK-LABEL: mov_int32_65536: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q0, #0x10000 +; CHECK-NEXT: bx lr entry: ret <4 x i32> } define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777216() { -; CHECKLE-LABEL: mov_int32_16777216: -; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: vmov.i32 q0, #0x1000000 -; CHECKLE-NEXT: bx lr -; -; CHECKBE-LABEL: mov_int32_16777216: -; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i32 q1, #0x1000000 -; CHECKBE-NEXT: vrev64.32 q0, q1 -; CHECKBE-NEXT: bx lr +; CHECK-LABEL: mov_int32_16777216: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q0, #0x1000000 +; CHECK-NEXT: bx lr entry: ret <4 x i32> } @@ -216,61 +156,37 @@ entry: } define arm_aapcs_vfpcc <4 x i32> @mov_int32_17919() { -; CHECKLE-LABEL: mov_int32_17919: -; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: vmov.i32 q0, #0x45ff -; CHECKLE-NEXT: bx lr -; -; CHECKBE-LABEL: mov_int32_17919: -; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i32 q1, #0x45ff -; CHECKBE-NEXT: vrev64.32 q0, q1 -; CHECKBE-NEXT: bx lr +; CHECK-LABEL: mov_int32_17919: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q0, #0x45ff +; CHECK-NEXT: bx lr entry: ret <4 x i32> } define arm_aapcs_vfpcc <4 x i32> @mov_int32_4587519() { -; CHECKLE-LABEL: mov_int32_4587519: -; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: vmov.i32 q0, #0x45ffff -; CHECKLE-NEXT: bx lr -; -; CHECKBE-LABEL: mov_int32_4587519: -; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i32 q1, #0x45ffff -; CHECKBE-NEXT: vrev64.32 q0, q1 -; CHECKBE-NEXT: bx lr +; CHECK-LABEL: mov_int32_4587519: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q0, #0x45ffff +; CHECK-NEXT: bx lr entry: ret <4 x i32> } define arm_aapcs_vfpcc <4 x i32> @mov_int32_m1() { -; CHECKLE-LABEL: mov_int32_m1: -; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: vmov.i8 q0, #0xff -; CHECKLE-NEXT: bx lr -; -; CHECKBE-LABEL: mov_int32_m1: -; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i8 q1, #0xff -; CHECKBE-NEXT: vrev64.8 q0, q1 -; CHECKBE-NEXT: bx lr +; CHECK-LABEL: mov_int32_m1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i8 q0, #0xff +; CHECK-NEXT: bx lr entry: ret <4 x i32> } define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294901760() { -; CHECKLE-LABEL: mov_int32_4294901760: -; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: vmvn.i32 q0, #0xffff -; CHECKLE-NEXT: bx lr -; -; CHECKBE-LABEL: mov_int32_4294901760: -; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmvn.i32 q1, #0xffff -; CHECKBE-NEXT: vrev64.32 q0, q1 -; CHECKBE-NEXT: bx lr +; CHECK-LABEL: mov_int32_4294901760: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmvn.i32 q0, #0xffff +; CHECK-NEXT: bx lr entry: ret <4 x i32> } @@ -307,16 +223,10 @@ entry: } define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278255615() { -; CHECKLE-LABEL: mov_int32_4278255615: -; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: vmvn.i32 q0, #0xff0000 -; CHECKLE-NEXT: bx lr -; -; CHECKBE-LABEL: mov_int32_4278255615: -; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmvn.i32 q1, #0xff0000 -; CHECKBE-NEXT: vrev64.32 q0, q1 -; CHECKBE-NEXT: bx lr +; CHECK-LABEL: mov_int32_4278255615: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmvn.i32 q0, #0xff0000 +; CHECK-NEXT: bx lr entry: ret <4 x i32> } @@ -367,16 +277,10 @@ entry: } define arm_aapcs_vfpcc <2 x i64> @mov_int64_m1() { -; CHECKLE-LABEL: mov_int64_m1: -; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: vmov.i8 q0, #0xff -; CHECKLE-NEXT: bx lr -; -; CHECKBE-LABEL: mov_int64_m1: -; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i8 q1, #0xff -; CHECKBE-NEXT: vrev64.8 q0, q1 -; CHECKBE-NEXT: bx lr +; CHECK-LABEL: mov_int64_m1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i8 q0, #0xff +; CHECK-NEXT: bx lr entry: ret <2 x i64> < i64 -1, i64 -1 > } @@ -462,8 +366,7 @@ define arm_aapcs_vfpcc <16 x i8> @mov_int64_0f0f0f0f0f0f0f0f() { ; ; CHECKBE-LABEL: mov_int64_0f0f0f0f0f0f0f0f: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i16 q1, #0xff00 -; CHECKBE-NEXT: vrev64.16 q0, q1 +; CHECKBE-NEXT: vmov.i16 q0, #0xff00 ; CHECKBE-NEXT: bx lr entry: ret <16 x i8> @@ -532,32 +435,20 @@ entry: } define arm_aapcs_vfpcc <8 x half> @mov_float16_1() { -; CHECKLE-LABEL: mov_float16_1: -; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: vmov.i16 q0, #0x3c00 -; CHECKLE-NEXT: bx lr -; -; CHECKBE-LABEL: mov_float16_1: -; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i16 q1, #0x3c00 -; CHECKBE-NEXT: vrev64.16 q0, q1 -; CHECKBE-NEXT: bx lr +; CHECK-LABEL: mov_float16_1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i16 q0, #0x3c00 +; CHECK-NEXT: bx lr entry: ret <8 x half> } define arm_aapcs_vfpcc <8 x half> @mov_float16_m3() { -; CHECKLE-LABEL: mov_float16_m3: -; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: vmov.i16 q0, #0xc200 -; CHECKLE-NEXT: bx lr -; -; CHECKBE-LABEL: mov_float16_m3: -; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i16 q1, #0xc200 -; CHECKBE-NEXT: vrev64.16 q0, q1 -; CHECKBE-NEXT: bx lr +; CHECK-LABEL: mov_float16_m3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i16 q0, #0xc200 +; CHECK-NEXT: bx lr entry: ret <8 x half>