diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4799afbb74ad..ab7124183388 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -242,7 +242,8 @@ namespace { } bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); - bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded); + bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded, + bool AssumeSingleUse = false); bool CombineToPreIndexedLoadStore(SDNode *N); bool CombineToPostIndexedLoadStore(SDNode *N); @@ -1064,11 +1065,12 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { /// Check the specified vector node value to see if it can be simplified or /// if things it uses can be simplified as it only uses some of the elements. /// If so, return true. -bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, - const APInt &Demanded) { +bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded, + bool AssumeSingleUse) { TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); APInt KnownUndef, KnownZero; - if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO)) + if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO, + 0, AssumeSingleUse)) return false; // Revisit the node. @@ -15014,6 +15016,23 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } } + // If only EXTRACT_VECTOR_ELT nodes use the source vector we can + // simplify it based on the (valid) extraction indices. + if (llvm::all_of(InVec->uses(), [&](SDNode *Use) { + return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Use->getOperand(0) == InVec && + isa(Use->getOperand(1)); + })) { + APInt DemandedElts = APInt::getNullValue(VT.getVectorNumElements()); + for (SDNode *Use : InVec->uses()) { + auto *CstElt = cast(Use->getOperand(1)); + if (CstElt->getAPIntValue().ult(VT.getVectorNumElements())) + DemandedElts.setBit(CstElt->getZExtValue()); + } + if (SimplifyDemandedVectorElts(InVec, DemandedElts, true)) + return SDValue(N, 0); + } + bool BCNumEltsChanged = false; EVT ExtVT = VT.getVectorElementType(); EVT LVT = ExtVT; diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index e69afd8a36ae..302c7883f97b 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -3893,20 +3893,34 @@ static const Permute *matchDoublePermute(const SmallVectorImpl &Bytes, return nullptr; } -// Convert the mask of the given VECTOR_SHUFFLE into a byte-level mask, +// Convert the mask of the given shuffle op into a byte-level mask, // as if it had type vNi8. -static void getVPermMask(ShuffleVectorSDNode *VSN, +static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl &Bytes) { - EVT VT = VSN->getValueType(0); + EVT VT = ShuffleOp.getValueType(); unsigned NumElements = VT.getVectorNumElements(); unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); - Bytes.resize(NumElements * BytesPerElement, -1); - for (unsigned I = 0; I < NumElements; ++I) { - int Index = VSN->getMaskElt(I); - if (Index >= 0) + + if (auto *VSN = dyn_cast(ShuffleOp)) { + Bytes.resize(NumElements * BytesPerElement, -1); + for (unsigned I = 0; I < NumElements; ++I) { + int Index = VSN->getMaskElt(I); + if (Index >= 0) + for (unsigned J = 0; J < BytesPerElement; ++J) + Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; + } + return true; + } + if (SystemZISD::SPLAT == ShuffleOp.getOpcode() && + isa(ShuffleOp.getOperand(1))) { + unsigned Index = ShuffleOp.getConstantOperandVal(1); + Bytes.resize(NumElements * BytesPerElement, -1); + for (unsigned I = 0; I < NumElements; ++I) for (unsigned J = 0; J < BytesPerElement; ++J) Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; + return true; } + return false; } // Bytes is a VPERM-like permute vector, except that -1 is used for @@ -4075,7 +4089,8 @@ bool GeneralShuffle::add(SDValue Op, unsigned Elem) { // See whether the bytes we need come from a contiguous part of one // operand. SmallVector OpBytes; - getVPermMask(cast(Op), OpBytes); + if (!getVPermMask(Op, OpBytes)) + break; int NewByte; if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte)) break; @@ -5109,13 +5124,14 @@ SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT, if (Opcode == ISD::BITCAST) // Look through bitcasts. Op = Op.getOperand(0); - else if (Opcode == ISD::VECTOR_SHUFFLE && + else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) && canTreatAsByteVector(Op.getValueType())) { // Get a VPERM-like permute mask and see whether the bytes covered // by the extracted element are a contiguous sequence from one // source operand. SmallVector Bytes; - getVPermMask(cast(Op), Bytes); + if (!getVPermMask(Op, Bytes)) + break; int First; if (!getShuffleInput(Bytes, Index * BytesPerElement, BytesPerElement, First)) diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll index 0e5ac1e6addc..31199b47e20c 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -480,38 +480,28 @@ bb7: ; preds = %bb4, %bb1 ; GCN-LABEL: {{^}}multi_same_block: -; GCN-DAG: v_mov_b32_e32 v[[VEC0_ELT0:[0-9]+]], 0x41880000 -; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000 -; GCN-DAG: v_mov_b32_e32 v[[VEC0_ELT2:[0-9]+]], 0x41980000 -; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a00000 -; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a80000 -; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b00000 -; GCN-DAG: s_load_dword [[ARG:s[0-9]+]] -; IDXMODE-DAG: s_add_i32 [[ARG_ADD:s[0-9]+]], [[ARG]], -16 +; GCN: s_load_dword [[ARG:s[0-9]+]] -; MOVREL-DAG: s_add_i32 m0, [[ARG]], -16 -; MOVREL: v_movreld_b32_e32 v[[VEC0_ELT0]], 4.0 -; GCN-NOT: m0 +; MOVREL: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000 +; MOVREL: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd +; MOVREL: s_waitcnt +; MOVREL: s_add_i32 m0, [[ARG]], -16 +; MOVREL: v_movreld_b32_e32 v{{[0-9]+}}, 4.0 +; MOVREL: v_movreld_b32_e32 v{{[0-9]+}}, -4.0 +; MOVREL: s_mov_b32 m0, -1 -; IDXMODE: s_set_gpr_idx_on [[ARG_ADD]], dst -; IDXMODE: v_mov_b32_e32 v[[VEC0_ELT0]], 4.0 + +; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000 +; IDXMODE: s_waitcnt +; IDXMODE: s_add_i32 [[ARG]], [[ARG]], -16 +; IDXMODE: s_set_gpr_idx_on [[ARG]], dst +; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 4.0 +; IDXMODE: s_set_gpr_idx_off +; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd +; IDXMODE: s_set_gpr_idx_on [[ARG]], dst +; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, -4.0 ; IDXMODE: s_set_gpr_idx_off -; GCN: v_mov_b32_e32 v[[VEC0_ELT2]], 0x4188cccd -; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x4190cccd -; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x4198cccd -; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a0cccd -; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a8cccd -; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd - -; MOVREL: v_movreld_b32_e32 v[[VEC0_ELT2]], -4.0 - -; IDXMODE: s_set_gpr_idx_on [[ARG_ADD]], dst -; IDXMODE: v_mov_b32_e32 v[[VEC0_ELT2]], -4.0 -; IDXMODE: s_set_gpr_idx_off - -; PREGFX9: s_mov_b32 m0, -1 -; GFX9-NOT: s_mov_b32 m0 ; GCN: ds_write_b32 ; GCN: ds_write_b32 ; GCN: s_endpgm diff --git a/llvm/test/CodeGen/ARM/func-argpassing-endian.ll b/llvm/test/CodeGen/ARM/func-argpassing-endian.ll index 6edc0284a842..d73aa8655a76 100644 --- a/llvm/test/CodeGen/ARM/func-argpassing-endian.ll +++ b/llvm/test/CodeGen/ARM/func-argpassing-endian.ll @@ -38,7 +38,6 @@ define void @arg_double( double %val ) { define void @arg_v4i32(<4 x i32> %vec ) { ; CHECK-LE-LABEL: arg_v4i32: ; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmov d17, r2, r3 ; CHECK-LE-NEXT: vmov d16, r0, r1 ; CHECK-LE-NEXT: movw r0, :lower16:var32 ; CHECK-LE-NEXT: movt r0, :upper16:var32 @@ -47,7 +46,6 @@ define void @arg_v4i32(<4 x i32> %vec ) { ; ; CHECK-BE-LABEL: arg_v4i32: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov d17, r3, r2 ; CHECK-BE-NEXT: vmov d16, r1, r0 ; CHECK-BE-NEXT: movw r0, :lower16:var32 ; CHECK-BE-NEXT: movt r0, :upper16:var32 diff --git a/llvm/test/CodeGen/Mips/cconv/vector.ll b/llvm/test/CodeGen/Mips/cconv/vector.ll index f7b19f0476fd..b580d2a338c9 100644 --- a/llvm/test/CodeGen/Mips/cconv/vector.ll +++ b/llvm/test/CodeGen/Mips/cconv/vector.ll @@ -89,61 +89,37 @@ define <2 x i8> @i8_2(<2 x i8> %a, <2 x i8> %b) { ; ; MIPS64R5EB-LABEL: i8_2: ; MIPS64R5EB: # %bb.0: -; MIPS64R5EB-NEXT: daddiu $sp, $sp, -64 -; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 64 -; MIPS64R5EB-NEXT: sd $4, 56($sp) -; MIPS64R5EB-NEXT: ldi.b $w0, 0 -; MIPS64R5EB-NEXT: lbu $1, 57($sp) -; MIPS64R5EB-NEXT: lbu $2, 56($sp) -; MIPS64R5EB-NEXT: move.v $w1, $w0 -; MIPS64R5EB-NEXT: insert.h $w1[0], $2 -; MIPS64R5EB-NEXT: insert.h $w1[1], $1 -; MIPS64R5EB-NEXT: lbu $1, 58($sp) -; MIPS64R5EB-NEXT: insert.h $w1[2], $1 -; MIPS64R5EB-NEXT: lbu $1, 59($sp) -; MIPS64R5EB-NEXT: insert.h $w1[3], $1 -; MIPS64R5EB-NEXT: lbu $1, 60($sp) -; MIPS64R5EB-NEXT: insert.h $w1[4], $1 -; MIPS64R5EB-NEXT: lbu $1, 61($sp) -; MIPS64R5EB-NEXT: insert.h $w1[5], $1 -; MIPS64R5EB-NEXT: lbu $1, 63($sp) -; MIPS64R5EB-NEXT: lbu $2, 62($sp) -; MIPS64R5EB-NEXT: insert.h $w1[6], $2 -; MIPS64R5EB-NEXT: insert.h $w1[7], $1 -; MIPS64R5EB-NEXT: copy_s.h $1, $w1[0] -; MIPS64R5EB-NEXT: copy_s.h $2, $w1[1] -; MIPS64R5EB-NEXT: sd $5, 48($sp) -; MIPS64R5EB-NEXT: lbu $3, 48($sp) -; MIPS64R5EB-NEXT: insert.h $w0[0], $3 -; MIPS64R5EB-NEXT: lbu $3, 49($sp) -; MIPS64R5EB-NEXT: insert.h $w0[1], $3 -; MIPS64R5EB-NEXT: lbu $3, 50($sp) -; MIPS64R5EB-NEXT: insert.h $w0[2], $3 -; MIPS64R5EB-NEXT: lbu $3, 51($sp) -; MIPS64R5EB-NEXT: insert.h $w0[3], $3 -; MIPS64R5EB-NEXT: lbu $3, 52($sp) -; MIPS64R5EB-NEXT: insert.h $w0[4], $3 -; MIPS64R5EB-NEXT: lbu $3, 53($sp) -; MIPS64R5EB-NEXT: insert.h $w0[5], $3 -; MIPS64R5EB-NEXT: lbu $3, 55($sp) -; MIPS64R5EB-NEXT: lbu $4, 54($sp) -; MIPS64R5EB-NEXT: insert.h $w0[6], $4 -; MIPS64R5EB-NEXT: insert.h $w0[7], $3 +; MIPS64R5EB-NEXT: daddiu $sp, $sp, -96 +; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 96 +; MIPS64R5EB-NEXT: sd $4, 88($sp) +; MIPS64R5EB-NEXT: lbu $1, 89($sp) +; MIPS64R5EB-NEXT: sh $1, 2($sp) +; MIPS64R5EB-NEXT: lbu $1, 88($sp) +; MIPS64R5EB-NEXT: sh $1, 0($sp) +; MIPS64R5EB-NEXT: ld.h $w0, 0($sp) +; MIPS64R5EB-NEXT: copy_s.h $1, $w0[0] +; MIPS64R5EB-NEXT: copy_s.h $2, $w0[1] +; MIPS64R5EB-NEXT: sd $5, 80($sp) +; MIPS64R5EB-NEXT: lbu $3, 81($sp) +; MIPS64R5EB-NEXT: sh $3, 18($sp) +; MIPS64R5EB-NEXT: lbu $3, 80($sp) +; MIPS64R5EB-NEXT: sh $3, 16($sp) +; MIPS64R5EB-NEXT: ld.h $w0, 16($sp) ; MIPS64R5EB-NEXT: copy_s.h $3, $w0[0] ; MIPS64R5EB-NEXT: copy_s.h $4, $w0[1] -; MIPS64R5EB-NEXT: sw $4, 28($sp) -; MIPS64R5EB-NEXT: sw $3, 20($sp) -; MIPS64R5EB-NEXT: sw $2, 12($sp) -; MIPS64R5EB-NEXT: sw $1, 4($sp) -; MIPS64R5EB-NEXT: ld.d $w0, 16($sp) -; MIPS64R5EB-NEXT: ld.d $w1, 0($sp) +; MIPS64R5EB-NEXT: sw $4, 60($sp) +; MIPS64R5EB-NEXT: sw $3, 52($sp) +; MIPS64R5EB-NEXT: sw $2, 44($sp) +; MIPS64R5EB-NEXT: sw $1, 36($sp) +; MIPS64R5EB-NEXT: ld.d $w0, 48($sp) +; MIPS64R5EB-NEXT: ld.d $w1, 32($sp) ; MIPS64R5EB-NEXT: addv.d $w0, $w1, $w0 ; MIPS64R5EB-NEXT: copy_s.d $1, $w0[0] ; MIPS64R5EB-NEXT: copy_s.d $2, $w0[1] -; MIPS64R5EB-NEXT: sb $2, 45($sp) -; MIPS64R5EB-NEXT: sb $1, 44($sp) -; MIPS64R5EB-NEXT: lh $2, 44($sp) -; MIPS64R5EB-NEXT: daddiu $sp, $sp, 64 +; MIPS64R5EB-NEXT: sb $2, 77($sp) +; MIPS64R5EB-NEXT: sb $1, 76($sp) +; MIPS64R5EB-NEXT: lh $2, 76($sp) +; MIPS64R5EB-NEXT: daddiu $sp, $sp, 96 ; MIPS64R5EB-NEXT: jr $ra ; MIPS64R5EB-NEXT: nop ; @@ -215,61 +191,37 @@ define <2 x i8> @i8_2(<2 x i8> %a, <2 x i8> %b) { ; ; MIPS64R5EL-LABEL: i8_2: ; MIPS64R5EL: # %bb.0: -; MIPS64R5EL-NEXT: daddiu $sp, $sp, -64 -; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 64 -; MIPS64R5EL-NEXT: sd $4, 56($sp) -; MIPS64R5EL-NEXT: ldi.b $w0, 0 -; MIPS64R5EL-NEXT: lbu $1, 57($sp) -; MIPS64R5EL-NEXT: lbu $2, 56($sp) -; MIPS64R5EL-NEXT: move.v $w1, $w0 -; MIPS64R5EL-NEXT: insert.h $w1[0], $2 -; MIPS64R5EL-NEXT: insert.h $w1[1], $1 -; MIPS64R5EL-NEXT: lbu $1, 58($sp) -; MIPS64R5EL-NEXT: insert.h $w1[2], $1 -; MIPS64R5EL-NEXT: lbu $1, 59($sp) -; MIPS64R5EL-NEXT: insert.h $w1[3], $1 -; MIPS64R5EL-NEXT: lbu $1, 60($sp) -; MIPS64R5EL-NEXT: insert.h $w1[4], $1 -; MIPS64R5EL-NEXT: lbu $1, 61($sp) -; MIPS64R5EL-NEXT: insert.h $w1[5], $1 -; MIPS64R5EL-NEXT: lbu $1, 63($sp) -; MIPS64R5EL-NEXT: lbu $2, 62($sp) -; MIPS64R5EL-NEXT: insert.h $w1[6], $2 -; MIPS64R5EL-NEXT: insert.h $w1[7], $1 -; MIPS64R5EL-NEXT: copy_s.h $1, $w1[0] -; MIPS64R5EL-NEXT: copy_s.h $2, $w1[1] -; MIPS64R5EL-NEXT: sd $5, 48($sp) -; MIPS64R5EL-NEXT: lbu $3, 48($sp) -; MIPS64R5EL-NEXT: insert.h $w0[0], $3 -; MIPS64R5EL-NEXT: lbu $3, 49($sp) -; MIPS64R5EL-NEXT: insert.h $w0[1], $3 -; MIPS64R5EL-NEXT: lbu $3, 50($sp) -; MIPS64R5EL-NEXT: insert.h $w0[2], $3 -; MIPS64R5EL-NEXT: lbu $3, 51($sp) -; MIPS64R5EL-NEXT: insert.h $w0[3], $3 -; MIPS64R5EL-NEXT: lbu $3, 52($sp) -; MIPS64R5EL-NEXT: insert.h $w0[4], $3 -; MIPS64R5EL-NEXT: lbu $3, 53($sp) -; MIPS64R5EL-NEXT: insert.h $w0[5], $3 -; MIPS64R5EL-NEXT: lbu $3, 55($sp) -; MIPS64R5EL-NEXT: lbu $4, 54($sp) -; MIPS64R5EL-NEXT: insert.h $w0[6], $4 -; MIPS64R5EL-NEXT: insert.h $w0[7], $3 +; MIPS64R5EL-NEXT: daddiu $sp, $sp, -96 +; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 96 +; MIPS64R5EL-NEXT: sd $4, 88($sp) +; MIPS64R5EL-NEXT: lbu $1, 89($sp) +; MIPS64R5EL-NEXT: sh $1, 2($sp) +; MIPS64R5EL-NEXT: lbu $1, 88($sp) +; MIPS64R5EL-NEXT: sh $1, 0($sp) +; MIPS64R5EL-NEXT: ld.h $w0, 0($sp) +; MIPS64R5EL-NEXT: copy_s.h $1, $w0[0] +; MIPS64R5EL-NEXT: copy_s.h $2, $w0[1] +; MIPS64R5EL-NEXT: sd $5, 80($sp) +; MIPS64R5EL-NEXT: lbu $3, 81($sp) +; MIPS64R5EL-NEXT: sh $3, 18($sp) +; MIPS64R5EL-NEXT: lbu $3, 80($sp) +; MIPS64R5EL-NEXT: sh $3, 16($sp) +; MIPS64R5EL-NEXT: ld.h $w0, 16($sp) ; MIPS64R5EL-NEXT: copy_s.h $3, $w0[0] ; MIPS64R5EL-NEXT: copy_s.h $4, $w0[1] -; MIPS64R5EL-NEXT: sw $4, 24($sp) -; MIPS64R5EL-NEXT: sw $3, 16($sp) -; MIPS64R5EL-NEXT: sw $2, 8($sp) -; MIPS64R5EL-NEXT: sw $1, 0($sp) -; MIPS64R5EL-NEXT: ld.d $w0, 16($sp) -; MIPS64R5EL-NEXT: ld.d $w1, 0($sp) +; MIPS64R5EL-NEXT: sw $4, 56($sp) +; MIPS64R5EL-NEXT: sw $3, 48($sp) +; MIPS64R5EL-NEXT: sw $2, 40($sp) +; MIPS64R5EL-NEXT: sw $1, 32($sp) +; MIPS64R5EL-NEXT: ld.d $w0, 48($sp) +; MIPS64R5EL-NEXT: ld.d $w1, 32($sp) ; MIPS64R5EL-NEXT: addv.d $w0, $w1, $w0 ; MIPS64R5EL-NEXT: copy_s.d $1, $w0[0] ; MIPS64R5EL-NEXT: copy_s.d $2, $w0[1] -; MIPS64R5EL-NEXT: sb $2, 45($sp) -; MIPS64R5EL-NEXT: sb $1, 44($sp) -; MIPS64R5EL-NEXT: lh $2, 44($sp) -; MIPS64R5EL-NEXT: daddiu $sp, $sp, 64 +; MIPS64R5EL-NEXT: sb $2, 77($sp) +; MIPS64R5EL-NEXT: sb $1, 76($sp) +; MIPS64R5EL-NEXT: lh $2, 76($sp) +; MIPS64R5EL-NEXT: daddiu $sp, $sp, 96 ; MIPS64R5EL-NEXT: jr $ra ; MIPS64R5EL-NEXT: nop %1 = add <2 x i8> %a, %b @@ -445,181 +397,97 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x ; ; MIPS64R5EB-LABEL: i8x2_7: ; MIPS64R5EB: # %bb.0: # %entry -; MIPS64R5EB-NEXT: daddiu $sp, $sp, -176 -; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 176 -; MIPS64R5EB-NEXT: sd $4, 168($sp) -; MIPS64R5EB-NEXT: ldi.b $w0, 0 -; MIPS64R5EB-NEXT: lbu $1, 169($sp) -; MIPS64R5EB-NEXT: lbu $2, 168($sp) -; MIPS64R5EB-NEXT: move.v $w1, $w0 -; MIPS64R5EB-NEXT: insert.h $w1[0], $2 -; MIPS64R5EB-NEXT: insert.h $w1[1], $1 -; MIPS64R5EB-NEXT: lbu $1, 170($sp) -; MIPS64R5EB-NEXT: insert.h $w1[2], $1 -; MIPS64R5EB-NEXT: lbu $1, 171($sp) -; MIPS64R5EB-NEXT: insert.h $w1[3], $1 -; MIPS64R5EB-NEXT: lbu $1, 172($sp) -; MIPS64R5EB-NEXT: insert.h $w1[4], $1 -; MIPS64R5EB-NEXT: lbu $1, 173($sp) -; MIPS64R5EB-NEXT: insert.h $w1[5], $1 -; MIPS64R5EB-NEXT: lbu $1, 175($sp) -; MIPS64R5EB-NEXT: lbu $2, 174($sp) -; MIPS64R5EB-NEXT: insert.h $w1[6], $2 -; MIPS64R5EB-NEXT: insert.h $w1[7], $1 -; MIPS64R5EB-NEXT: copy_s.h $1, $w1[0] -; MIPS64R5EB-NEXT: copy_s.h $2, $w1[1] -; MIPS64R5EB-NEXT: sd $5, 160($sp) -; MIPS64R5EB-NEXT: lbu $3, 161($sp) -; MIPS64R5EB-NEXT: lbu $4, 160($sp) -; MIPS64R5EB-NEXT: move.v $w1, $w0 -; MIPS64R5EB-NEXT: insert.h $w1[0], $4 -; MIPS64R5EB-NEXT: insert.h $w1[1], $3 -; MIPS64R5EB-NEXT: lbu $3, 162($sp) -; MIPS64R5EB-NEXT: insert.h $w1[2], $3 -; MIPS64R5EB-NEXT: lbu $3, 163($sp) -; MIPS64R5EB-NEXT: insert.h $w1[3], $3 -; MIPS64R5EB-NEXT: lbu $3, 164($sp) -; MIPS64R5EB-NEXT: insert.h $w1[4], $3 -; MIPS64R5EB-NEXT: lbu $3, 165($sp) -; MIPS64R5EB-NEXT: insert.h $w1[5], $3 -; MIPS64R5EB-NEXT: lbu $3, 167($sp) -; MIPS64R5EB-NEXT: lbu $4, 166($sp) -; MIPS64R5EB-NEXT: insert.h $w1[6], $4 -; MIPS64R5EB-NEXT: insert.h $w1[7], $3 -; MIPS64R5EB-NEXT: copy_s.h $3, $w1[0] -; MIPS64R5EB-NEXT: copy_s.h $4, $w1[1] -; MIPS64R5EB-NEXT: sw $4, 28($sp) -; MIPS64R5EB-NEXT: sw $3, 20($sp) -; MIPS64R5EB-NEXT: sw $2, 12($sp) -; MIPS64R5EB-NEXT: sw $1, 4($sp) -; MIPS64R5EB-NEXT: ld.d $w1, 16($sp) -; MIPS64R5EB-NEXT: ld.d $w2, 0($sp) -; MIPS64R5EB-NEXT: addv.d $w1, $w2, $w1 -; MIPS64R5EB-NEXT: sd $6, 152($sp) -; MIPS64R5EB-NEXT: lbu $1, 153($sp) -; MIPS64R5EB-NEXT: lbu $2, 152($sp) -; MIPS64R5EB-NEXT: move.v $w2, $w0 -; MIPS64R5EB-NEXT: insert.h $w2[0], $2 -; MIPS64R5EB-NEXT: insert.h $w2[1], $1 -; MIPS64R5EB-NEXT: lbu $1, 154($sp) -; MIPS64R5EB-NEXT: insert.h $w2[2], $1 -; MIPS64R5EB-NEXT: lbu $1, 155($sp) -; MIPS64R5EB-NEXT: insert.h $w2[3], $1 -; MIPS64R5EB-NEXT: lbu $1, 156($sp) -; MIPS64R5EB-NEXT: insert.h $w2[4], $1 -; MIPS64R5EB-NEXT: lbu $1, 157($sp) -; MIPS64R5EB-NEXT: insert.h $w2[5], $1 -; MIPS64R5EB-NEXT: lbu $1, 159($sp) -; MIPS64R5EB-NEXT: lbu $2, 158($sp) -; MIPS64R5EB-NEXT: insert.h $w2[6], $2 -; MIPS64R5EB-NEXT: insert.h $w2[7], $1 -; MIPS64R5EB-NEXT: copy_s.h $1, $w2[0] -; MIPS64R5EB-NEXT: copy_s.h $2, $w2[1] -; MIPS64R5EB-NEXT: sw $2, 44($sp) -; MIPS64R5EB-NEXT: sw $1, 36($sp) -; MIPS64R5EB-NEXT: ld.d $w2, 32($sp) -; MIPS64R5EB-NEXT: addv.d $w1, $w1, $w2 -; MIPS64R5EB-NEXT: sd $7, 144($sp) -; MIPS64R5EB-NEXT: lbu $1, 145($sp) -; MIPS64R5EB-NEXT: lbu $2, 144($sp) -; MIPS64R5EB-NEXT: move.v $w2, $w0 -; MIPS64R5EB-NEXT: insert.h $w2[0], $2 -; MIPS64R5EB-NEXT: insert.h $w2[1], $1 -; MIPS64R5EB-NEXT: lbu $1, 146($sp) -; MIPS64R5EB-NEXT: insert.h $w2[2], $1 -; MIPS64R5EB-NEXT: lbu $1, 147($sp) -; MIPS64R5EB-NEXT: insert.h $w2[3], $1 -; MIPS64R5EB-NEXT: lbu $1, 148($sp) -; MIPS64R5EB-NEXT: insert.h $w2[4], $1 -; MIPS64R5EB-NEXT: lbu $1, 149($sp) -; MIPS64R5EB-NEXT: insert.h $w2[5], $1 -; MIPS64R5EB-NEXT: lbu $1, 151($sp) -; MIPS64R5EB-NEXT: lbu $2, 150($sp) -; MIPS64R5EB-NEXT: insert.h $w2[6], $2 -; MIPS64R5EB-NEXT: insert.h $w2[7], $1 -; MIPS64R5EB-NEXT: copy_s.h $1, $w2[0] -; MIPS64R5EB-NEXT: copy_s.h $2, $w2[1] -; MIPS64R5EB-NEXT: sw $2, 60($sp) -; MIPS64R5EB-NEXT: sw $1, 52($sp) -; MIPS64R5EB-NEXT: ld.d $w2, 48($sp) -; MIPS64R5EB-NEXT: addv.d $w1, $w1, $w2 -; MIPS64R5EB-NEXT: sd $8, 136($sp) -; MIPS64R5EB-NEXT: lbu $1, 137($sp) -; MIPS64R5EB-NEXT: lbu $2, 136($sp) -; MIPS64R5EB-NEXT: move.v $w2, $w0 -; MIPS64R5EB-NEXT: insert.h $w2[0], $2 -; MIPS64R5EB-NEXT: insert.h $w2[1], $1 -; MIPS64R5EB-NEXT: lbu $1, 138($sp) -; MIPS64R5EB-NEXT: insert.h $w2[2], $1 -; MIPS64R5EB-NEXT: lbu $1, 139($sp) -; MIPS64R5EB-NEXT: insert.h $w2[3], $1 -; MIPS64R5EB-NEXT: lbu $1, 140($sp) -; MIPS64R5EB-NEXT: insert.h $w2[4], $1 -; MIPS64R5EB-NEXT: lbu $1, 141($sp) -; MIPS64R5EB-NEXT: insert.h $w2[5], $1 -; MIPS64R5EB-NEXT: lbu $1, 143($sp) -; MIPS64R5EB-NEXT: lbu $2, 142($sp) -; MIPS64R5EB-NEXT: insert.h $w2[6], $2 -; MIPS64R5EB-NEXT: insert.h $w2[7], $1 -; MIPS64R5EB-NEXT: copy_s.h $1, $w2[0] -; MIPS64R5EB-NEXT: copy_s.h $2, $w2[1] -; MIPS64R5EB-NEXT: sd $10, 120($sp) -; MIPS64R5EB-NEXT: lbu $3, 121($sp) -; MIPS64R5EB-NEXT: lbu $4, 120($sp) -; MIPS64R5EB-NEXT: move.v $w2, $w0 -; MIPS64R5EB-NEXT: insert.h $w2[0], $4 -; MIPS64R5EB-NEXT: insert.h $w2[1], $3 -; MIPS64R5EB-NEXT: lbu $3, 122($sp) -; MIPS64R5EB-NEXT: insert.h $w2[2], $3 -; MIPS64R5EB-NEXT: lbu $3, 123($sp) -; MIPS64R5EB-NEXT: insert.h $w2[3], $3 -; MIPS64R5EB-NEXT: lbu $3, 124($sp) -; MIPS64R5EB-NEXT: insert.h $w2[4], $3 -; MIPS64R5EB-NEXT: lbu $3, 125($sp) -; MIPS64R5EB-NEXT: insert.h $w2[5], $3 -; MIPS64R5EB-NEXT: lbu $3, 127($sp) -; MIPS64R5EB-NEXT: lbu $4, 126($sp) -; MIPS64R5EB-NEXT: insert.h $w2[6], $4 -; MIPS64R5EB-NEXT: insert.h $w2[7], $3 -; MIPS64R5EB-NEXT: copy_s.h $3, $w2[0] -; MIPS64R5EB-NEXT: copy_s.h $4, $w2[1] -; MIPS64R5EB-NEXT: sw $2, 76($sp) -; MIPS64R5EB-NEXT: sw $1, 68($sp) -; MIPS64R5EB-NEXT: ld.d $w2, 64($sp) -; MIPS64R5EB-NEXT: addv.d $w1, $w1, $w2 -; MIPS64R5EB-NEXT: sd $9, 128($sp) -; MIPS64R5EB-NEXT: lbu $1, 128($sp) -; MIPS64R5EB-NEXT: insert.h $w0[0], $1 -; MIPS64R5EB-NEXT: lbu $1, 129($sp) -; MIPS64R5EB-NEXT: insert.h $w0[1], $1 -; MIPS64R5EB-NEXT: lbu $1, 130($sp) -; MIPS64R5EB-NEXT: insert.h $w0[2], $1 -; MIPS64R5EB-NEXT: lbu $1, 131($sp) -; MIPS64R5EB-NEXT: insert.h $w0[3], $1 -; MIPS64R5EB-NEXT: lbu $1, 132($sp) -; MIPS64R5EB-NEXT: insert.h $w0[4], $1 -; MIPS64R5EB-NEXT: lbu $1, 133($sp) -; MIPS64R5EB-NEXT: insert.h $w0[5], $1 -; MIPS64R5EB-NEXT: lbu $1, 135($sp) -; MIPS64R5EB-NEXT: lbu $2, 134($sp) -; MIPS64R5EB-NEXT: insert.h $w0[6], $2 -; MIPS64R5EB-NEXT: insert.h $w0[7], $1 +; MIPS64R5EB-NEXT: daddiu $sp, $sp, -288 +; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 288 +; MIPS64R5EB-NEXT: sd $4, 280($sp) +; MIPS64R5EB-NEXT: lbu $1, 281($sp) +; MIPS64R5EB-NEXT: sh $1, 2($sp) +; MIPS64R5EB-NEXT: lbu $1, 280($sp) +; MIPS64R5EB-NEXT: sh $1, 0($sp) +; MIPS64R5EB-NEXT: ld.h $w0, 0($sp) ; MIPS64R5EB-NEXT: copy_s.h $1, $w0[0] ; MIPS64R5EB-NEXT: copy_s.h $2, $w0[1] -; MIPS64R5EB-NEXT: sw $2, 92($sp) -; MIPS64R5EB-NEXT: sw $1, 84($sp) -; MIPS64R5EB-NEXT: ld.d $w0, 80($sp) +; MIPS64R5EB-NEXT: sd $5, 272($sp) +; MIPS64R5EB-NEXT: lbu $3, 273($sp) +; MIPS64R5EB-NEXT: sh $3, 18($sp) +; MIPS64R5EB-NEXT: lbu $3, 272($sp) +; MIPS64R5EB-NEXT: sh $3, 16($sp) +; MIPS64R5EB-NEXT: ld.h $w0, 16($sp) +; MIPS64R5EB-NEXT: copy_s.h $3, $w0[0] +; MIPS64R5EB-NEXT: copy_s.h $4, $w0[1] +; MIPS64R5EB-NEXT: sw $4, 140($sp) +; MIPS64R5EB-NEXT: sw $3, 132($sp) +; MIPS64R5EB-NEXT: sw $2, 124($sp) +; MIPS64R5EB-NEXT: sw $1, 116($sp) +; MIPS64R5EB-NEXT: ld.d $w0, 128($sp) +; MIPS64R5EB-NEXT: ld.d $w1, 112($sp) ; MIPS64R5EB-NEXT: addv.d $w0, $w1, $w0 -; MIPS64R5EB-NEXT: sw $4, 108($sp) -; MIPS64R5EB-NEXT: sw $3, 100($sp) -; MIPS64R5EB-NEXT: ld.d $w1, 96($sp) +; MIPS64R5EB-NEXT: sd $6, 264($sp) +; MIPS64R5EB-NEXT: lbu $1, 265($sp) +; MIPS64R5EB-NEXT: sh $1, 34($sp) +; MIPS64R5EB-NEXT: lbu $1, 264($sp) +; MIPS64R5EB-NEXT: sh $1, 32($sp) +; MIPS64R5EB-NEXT: ld.h $w1, 32($sp) +; MIPS64R5EB-NEXT: copy_s.h $1, $w1[0] +; MIPS64R5EB-NEXT: copy_s.h $2, $w1[1] +; MIPS64R5EB-NEXT: sw $2, 156($sp) +; MIPS64R5EB-NEXT: sw $1, 148($sp) +; MIPS64R5EB-NEXT: ld.d $w1, 144($sp) +; MIPS64R5EB-NEXT: addv.d $w0, $w0, $w1 +; MIPS64R5EB-NEXT: sd $7, 256($sp) +; MIPS64R5EB-NEXT: lbu $1, 257($sp) +; MIPS64R5EB-NEXT: sh $1, 50($sp) +; MIPS64R5EB-NEXT: lbu $1, 256($sp) +; MIPS64R5EB-NEXT: sh $1, 48($sp) +; MIPS64R5EB-NEXT: ld.h $w1, 48($sp) +; MIPS64R5EB-NEXT: copy_s.h $1, $w1[0] +; MIPS64R5EB-NEXT: copy_s.h $2, $w1[1] +; MIPS64R5EB-NEXT: sw $2, 172($sp) +; MIPS64R5EB-NEXT: sw $1, 164($sp) +; MIPS64R5EB-NEXT: ld.d $w1, 160($sp) +; MIPS64R5EB-NEXT: addv.d $w0, $w0, $w1 +; MIPS64R5EB-NEXT: sd $8, 248($sp) +; MIPS64R5EB-NEXT: lbu $1, 249($sp) +; MIPS64R5EB-NEXT: sh $1, 66($sp) +; MIPS64R5EB-NEXT: lbu $1, 248($sp) +; MIPS64R5EB-NEXT: sh $1, 64($sp) +; MIPS64R5EB-NEXT: ld.h $w1, 64($sp) +; MIPS64R5EB-NEXT: copy_s.h $1, $w1[0] +; MIPS64R5EB-NEXT: copy_s.h $2, $w1[1] +; MIPS64R5EB-NEXT: sw $2, 188($sp) +; MIPS64R5EB-NEXT: sw $1, 180($sp) +; MIPS64R5EB-NEXT: ld.d $w1, 176($sp) +; MIPS64R5EB-NEXT: addv.d $w0, $w0, $w1 +; MIPS64R5EB-NEXT: sd $10, 232($sp) +; MIPS64R5EB-NEXT: lbu $1, 233($sp) +; MIPS64R5EB-NEXT: sh $1, 98($sp) +; MIPS64R5EB-NEXT: lbu $1, 232($sp) +; MIPS64R5EB-NEXT: sh $1, 96($sp) +; MIPS64R5EB-NEXT: ld.h $w1, 96($sp) +; MIPS64R5EB-NEXT: copy_s.h $1, $w1[0] +; MIPS64R5EB-NEXT: copy_s.h $2, $w1[1] +; MIPS64R5EB-NEXT: sd $9, 240($sp) +; MIPS64R5EB-NEXT: lbu $3, 241($sp) +; MIPS64R5EB-NEXT: sh $3, 82($sp) +; MIPS64R5EB-NEXT: lbu $3, 240($sp) +; MIPS64R5EB-NEXT: sh $3, 80($sp) +; MIPS64R5EB-NEXT: ld.h $w1, 80($sp) +; MIPS64R5EB-NEXT: copy_s.h $3, $w1[0] +; MIPS64R5EB-NEXT: copy_s.h $4, $w1[1] +; MIPS64R5EB-NEXT: sw $4, 204($sp) +; MIPS64R5EB-NEXT: sw $3, 196($sp) +; MIPS64R5EB-NEXT: ld.d $w1, 192($sp) +; MIPS64R5EB-NEXT: addv.d $w0, $w0, $w1 +; MIPS64R5EB-NEXT: sw $2, 220($sp) +; MIPS64R5EB-NEXT: sw $1, 212($sp) +; MIPS64R5EB-NEXT: ld.d $w1, 208($sp) ; MIPS64R5EB-NEXT: addv.d $w0, $w0, $w1 ; MIPS64R5EB-NEXT: copy_s.d $1, $w0[0] ; MIPS64R5EB-NEXT: copy_s.d $2, $w0[1] -; MIPS64R5EB-NEXT: sb $2, 117($sp) -; MIPS64R5EB-NEXT: sb $1, 116($sp) -; MIPS64R5EB-NEXT: lh $2, 116($sp) -; MIPS64R5EB-NEXT: daddiu $sp, $sp, 176 +; MIPS64R5EB-NEXT: sb $2, 229($sp) +; MIPS64R5EB-NEXT: sb $1, 228($sp) +; MIPS64R5EB-NEXT: lh $2, 228($sp) +; MIPS64R5EB-NEXT: daddiu $sp, $sp, 288 ; MIPS64R5EB-NEXT: jr $ra ; MIPS64R5EB-NEXT: nop ; @@ -773,181 +641,97 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x ; ; MIPS64R5EL-LABEL: i8x2_7: ; MIPS64R5EL: # %bb.0: # %entry -; MIPS64R5EL-NEXT: daddiu $sp, $sp, -176 -; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 176 -; MIPS64R5EL-NEXT: sd $4, 168($sp) -; MIPS64R5EL-NEXT: ldi.b $w0, 0 -; MIPS64R5EL-NEXT: lbu $1, 169($sp) -; MIPS64R5EL-NEXT: lbu $2, 168($sp) -; MIPS64R5EL-NEXT: move.v $w1, $w0 -; MIPS64R5EL-NEXT: insert.h $w1[0], $2 -; MIPS64R5EL-NEXT: insert.h $w1[1], $1 -; MIPS64R5EL-NEXT: lbu $1, 170($sp) -; MIPS64R5EL-NEXT: insert.h $w1[2], $1 -; MIPS64R5EL-NEXT: lbu $1, 171($sp) -; MIPS64R5EL-NEXT: insert.h $w1[3], $1 -; MIPS64R5EL-NEXT: lbu $1, 172($sp) -; MIPS64R5EL-NEXT: insert.h $w1[4], $1 -; MIPS64R5EL-NEXT: lbu $1, 173($sp) -; MIPS64R5EL-NEXT: insert.h $w1[5], $1 -; MIPS64R5EL-NEXT: lbu $1, 175($sp) -; MIPS64R5EL-NEXT: lbu $2, 174($sp) -; MIPS64R5EL-NEXT: insert.h $w1[6], $2 -; MIPS64R5EL-NEXT: insert.h $w1[7], $1 -; MIPS64R5EL-NEXT: copy_s.h $1, $w1[0] -; MIPS64R5EL-NEXT: copy_s.h $2, $w1[1] -; MIPS64R5EL-NEXT: sd $5, 160($sp) -; MIPS64R5EL-NEXT: lbu $3, 161($sp) -; MIPS64R5EL-NEXT: lbu $4, 160($sp) -; MIPS64R5EL-NEXT: move.v $w1, $w0 -; MIPS64R5EL-NEXT: insert.h $w1[0], $4 -; MIPS64R5EL-NEXT: insert.h $w1[1], $3 -; MIPS64R5EL-NEXT: lbu $3, 162($sp) -; MIPS64R5EL-NEXT: insert.h $w1[2], $3 -; MIPS64R5EL-NEXT: lbu $3, 163($sp) -; MIPS64R5EL-NEXT: insert.h $w1[3], $3 -; MIPS64R5EL-NEXT: lbu $3, 164($sp) -; MIPS64R5EL-NEXT: insert.h $w1[4], $3 -; MIPS64R5EL-NEXT: lbu $3, 165($sp) -; MIPS64R5EL-NEXT: insert.h $w1[5], $3 -; MIPS64R5EL-NEXT: lbu $3, 167($sp) -; MIPS64R5EL-NEXT: lbu $4, 166($sp) -; MIPS64R5EL-NEXT: insert.h $w1[6], $4 -; MIPS64R5EL-NEXT: insert.h $w1[7], $3 -; MIPS64R5EL-NEXT: copy_s.h $3, $w1[0] -; MIPS64R5EL-NEXT: copy_s.h $4, $w1[1] -; MIPS64R5EL-NEXT: sw $4, 24($sp) -; MIPS64R5EL-NEXT: sw $3, 16($sp) -; MIPS64R5EL-NEXT: sw $2, 8($sp) -; MIPS64R5EL-NEXT: sw $1, 0($sp) -; MIPS64R5EL-NEXT: ld.d $w1, 16($sp) -; MIPS64R5EL-NEXT: ld.d $w2, 0($sp) -; MIPS64R5EL-NEXT: addv.d $w1, $w2, $w1 -; MIPS64R5EL-NEXT: sd $6, 152($sp) -; MIPS64R5EL-NEXT: lbu $1, 153($sp) -; MIPS64R5EL-NEXT: lbu $2, 152($sp) -; MIPS64R5EL-NEXT: move.v $w2, $w0 -; MIPS64R5EL-NEXT: insert.h $w2[0], $2 -; MIPS64R5EL-NEXT: insert.h $w2[1], $1 -; MIPS64R5EL-NEXT: lbu $1, 154($sp) -; MIPS64R5EL-NEXT: insert.h $w2[2], $1 -; MIPS64R5EL-NEXT: lbu $1, 155($sp) -; MIPS64R5EL-NEXT: insert.h $w2[3], $1 -; MIPS64R5EL-NEXT: lbu $1, 156($sp) -; MIPS64R5EL-NEXT: insert.h $w2[4], $1 -; MIPS64R5EL-NEXT: lbu $1, 157($sp) -; MIPS64R5EL-NEXT: insert.h $w2[5], $1 -; MIPS64R5EL-NEXT: lbu $1, 159($sp) -; MIPS64R5EL-NEXT: lbu $2, 158($sp) -; MIPS64R5EL-NEXT: insert.h $w2[6], $2 -; MIPS64R5EL-NEXT: insert.h $w2[7], $1 -; MIPS64R5EL-NEXT: copy_s.h $1, $w2[0] -; MIPS64R5EL-NEXT: copy_s.h $2, $w2[1] -; MIPS64R5EL-NEXT: sw $2, 40($sp) -; MIPS64R5EL-NEXT: sw $1, 32($sp) -; MIPS64R5EL-NEXT: ld.d $w2, 32($sp) -; MIPS64R5EL-NEXT: addv.d $w1, $w1, $w2 -; MIPS64R5EL-NEXT: sd $7, 144($sp) -; MIPS64R5EL-NEXT: lbu $1, 145($sp) -; MIPS64R5EL-NEXT: lbu $2, 144($sp) -; MIPS64R5EL-NEXT: move.v $w2, $w0 -; MIPS64R5EL-NEXT: insert.h $w2[0], $2 -; MIPS64R5EL-NEXT: insert.h $w2[1], $1 -; MIPS64R5EL-NEXT: lbu $1, 146($sp) -; MIPS64R5EL-NEXT: insert.h $w2[2], $1 -; MIPS64R5EL-NEXT: lbu $1, 147($sp) -; MIPS64R5EL-NEXT: insert.h $w2[3], $1 -; MIPS64R5EL-NEXT: lbu $1, 148($sp) -; MIPS64R5EL-NEXT: insert.h $w2[4], $1 -; MIPS64R5EL-NEXT: lbu $1, 149($sp) -; MIPS64R5EL-NEXT: insert.h $w2[5], $1 -; MIPS64R5EL-NEXT: lbu $1, 151($sp) -; MIPS64R5EL-NEXT: lbu $2, 150($sp) -; MIPS64R5EL-NEXT: insert.h $w2[6], $2 -; MIPS64R5EL-NEXT: insert.h $w2[7], $1 -; MIPS64R5EL-NEXT: copy_s.h $1, $w2[0] -; MIPS64R5EL-NEXT: copy_s.h $2, $w2[1] -; MIPS64R5EL-NEXT: sw $2, 56($sp) -; MIPS64R5EL-NEXT: sw $1, 48($sp) -; MIPS64R5EL-NEXT: ld.d $w2, 48($sp) -; MIPS64R5EL-NEXT: addv.d $w1, $w1, $w2 -; MIPS64R5EL-NEXT: sd $8, 136($sp) -; MIPS64R5EL-NEXT: lbu $1, 137($sp) -; MIPS64R5EL-NEXT: lbu $2, 136($sp) -; MIPS64R5EL-NEXT: move.v $w2, $w0 -; MIPS64R5EL-NEXT: insert.h $w2[0], $2 -; MIPS64R5EL-NEXT: insert.h $w2[1], $1 -; MIPS64R5EL-NEXT: lbu $1, 138($sp) -; MIPS64R5EL-NEXT: insert.h $w2[2], $1 -; MIPS64R5EL-NEXT: lbu $1, 139($sp) -; MIPS64R5EL-NEXT: insert.h $w2[3], $1 -; MIPS64R5EL-NEXT: lbu $1, 140($sp) -; MIPS64R5EL-NEXT: insert.h $w2[4], $1 -; MIPS64R5EL-NEXT: lbu $1, 141($sp) -; MIPS64R5EL-NEXT: insert.h $w2[5], $1 -; MIPS64R5EL-NEXT: lbu $1, 143($sp) -; MIPS64R5EL-NEXT: lbu $2, 142($sp) -; MIPS64R5EL-NEXT: insert.h $w2[6], $2 -; MIPS64R5EL-NEXT: insert.h $w2[7], $1 -; MIPS64R5EL-NEXT: copy_s.h $1, $w2[0] -; MIPS64R5EL-NEXT: copy_s.h $2, $w2[1] -; MIPS64R5EL-NEXT: sd $10, 120($sp) -; MIPS64R5EL-NEXT: lbu $3, 121($sp) -; MIPS64R5EL-NEXT: lbu $4, 120($sp) -; MIPS64R5EL-NEXT: move.v $w2, $w0 -; MIPS64R5EL-NEXT: insert.h $w2[0], $4 -; MIPS64R5EL-NEXT: insert.h $w2[1], $3 -; MIPS64R5EL-NEXT: lbu $3, 122($sp) -; MIPS64R5EL-NEXT: insert.h $w2[2], $3 -; MIPS64R5EL-NEXT: lbu $3, 123($sp) -; MIPS64R5EL-NEXT: insert.h $w2[3], $3 -; MIPS64R5EL-NEXT: lbu $3, 124($sp) -; MIPS64R5EL-NEXT: insert.h $w2[4], $3 -; MIPS64R5EL-NEXT: lbu $3, 125($sp) -; MIPS64R5EL-NEXT: insert.h $w2[5], $3 -; MIPS64R5EL-NEXT: lbu $3, 127($sp) -; MIPS64R5EL-NEXT: lbu $4, 126($sp) -; MIPS64R5EL-NEXT: insert.h $w2[6], $4 -; MIPS64R5EL-NEXT: insert.h $w2[7], $3 -; MIPS64R5EL-NEXT: copy_s.h $3, $w2[0] -; MIPS64R5EL-NEXT: copy_s.h $4, $w2[1] -; MIPS64R5EL-NEXT: sw $2, 72($sp) -; MIPS64R5EL-NEXT: sw $1, 64($sp) -; MIPS64R5EL-NEXT: ld.d $w2, 64($sp) -; MIPS64R5EL-NEXT: addv.d $w1, $w1, $w2 -; MIPS64R5EL-NEXT: sd $9, 128($sp) -; MIPS64R5EL-NEXT: lbu $1, 128($sp) -; MIPS64R5EL-NEXT: insert.h $w0[0], $1 -; MIPS64R5EL-NEXT: lbu $1, 129($sp) -; MIPS64R5EL-NEXT: insert.h $w0[1], $1 -; MIPS64R5EL-NEXT: lbu $1, 130($sp) -; MIPS64R5EL-NEXT: insert.h $w0[2], $1 -; MIPS64R5EL-NEXT: lbu $1, 131($sp) -; MIPS64R5EL-NEXT: insert.h $w0[3], $1 -; MIPS64R5EL-NEXT: lbu $1, 132($sp) -; MIPS64R5EL-NEXT: insert.h $w0[4], $1 -; MIPS64R5EL-NEXT: lbu $1, 133($sp) -; MIPS64R5EL-NEXT: insert.h $w0[5], $1 -; MIPS64R5EL-NEXT: lbu $1, 135($sp) -; MIPS64R5EL-NEXT: lbu $2, 134($sp) -; MIPS64R5EL-NEXT: insert.h $w0[6], $2 -; MIPS64R5EL-NEXT: insert.h $w0[7], $1 +; MIPS64R5EL-NEXT: daddiu $sp, $sp, -288 +; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 288 +; MIPS64R5EL-NEXT: sd $4, 280($sp) +; MIPS64R5EL-NEXT: lbu $1, 281($sp) +; MIPS64R5EL-NEXT: sh $1, 2($sp) +; MIPS64R5EL-NEXT: lbu $1, 280($sp) +; MIPS64R5EL-NEXT: sh $1, 0($sp) +; MIPS64R5EL-NEXT: ld.h $w0, 0($sp) ; MIPS64R5EL-NEXT: copy_s.h $1, $w0[0] ; MIPS64R5EL-NEXT: copy_s.h $2, $w0[1] -; MIPS64R5EL-NEXT: sw $2, 88($sp) -; MIPS64R5EL-NEXT: sw $1, 80($sp) -; MIPS64R5EL-NEXT: ld.d $w0, 80($sp) +; MIPS64R5EL-NEXT: sd $5, 272($sp) +; MIPS64R5EL-NEXT: lbu $3, 273($sp) +; MIPS64R5EL-NEXT: sh $3, 18($sp) +; MIPS64R5EL-NEXT: lbu $3, 272($sp) +; MIPS64R5EL-NEXT: sh $3, 16($sp) +; MIPS64R5EL-NEXT: ld.h $w0, 16($sp) +; MIPS64R5EL-NEXT: copy_s.h $3, $w0[0] +; MIPS64R5EL-NEXT: copy_s.h $4, $w0[1] +; MIPS64R5EL-NEXT: sw $4, 136($sp) +; MIPS64R5EL-NEXT: sw $3, 128($sp) +; MIPS64R5EL-NEXT: sw $2, 120($sp) +; MIPS64R5EL-NEXT: sw $1, 112($sp) +; MIPS64R5EL-NEXT: ld.d $w0, 128($sp) +; MIPS64R5EL-NEXT: ld.d $w1, 112($sp) ; MIPS64R5EL-NEXT: addv.d $w0, $w1, $w0 -; MIPS64R5EL-NEXT: sw $4, 104($sp) -; MIPS64R5EL-NEXT: sw $3, 96($sp) -; MIPS64R5EL-NEXT: ld.d $w1, 96($sp) +; MIPS64R5EL-NEXT: sd $6, 264($sp) +; MIPS64R5EL-NEXT: lbu $1, 265($sp) +; MIPS64R5EL-NEXT: sh $1, 34($sp) +; MIPS64R5EL-NEXT: lbu $1, 264($sp) +; MIPS64R5EL-NEXT: sh $1, 32($sp) +; MIPS64R5EL-NEXT: ld.h $w1, 32($sp) +; MIPS64R5EL-NEXT: copy_s.h $1, $w1[0] +; MIPS64R5EL-NEXT: copy_s.h $2, $w1[1] +; MIPS64R5EL-NEXT: sw $2, 152($sp) +; MIPS64R5EL-NEXT: sw $1, 144($sp) +; MIPS64R5EL-NEXT: ld.d $w1, 144($sp) +; MIPS64R5EL-NEXT: addv.d $w0, $w0, $w1 +; MIPS64R5EL-NEXT: sd $7, 256($sp) +; MIPS64R5EL-NEXT: lbu $1, 257($sp) +; MIPS64R5EL-NEXT: sh $1, 50($sp) +; MIPS64R5EL-NEXT: lbu $1, 256($sp) +; MIPS64R5EL-NEXT: sh $1, 48($sp) +; MIPS64R5EL-NEXT: ld.h $w1, 48($sp) +; MIPS64R5EL-NEXT: copy_s.h $1, $w1[0] +; MIPS64R5EL-NEXT: copy_s.h $2, $w1[1] +; MIPS64R5EL-NEXT: sw $2, 168($sp) +; MIPS64R5EL-NEXT: sw $1, 160($sp) +; MIPS64R5EL-NEXT: ld.d $w1, 160($sp) +; MIPS64R5EL-NEXT: addv.d $w0, $w0, $w1 +; MIPS64R5EL-NEXT: sd $8, 248($sp) +; MIPS64R5EL-NEXT: lbu $1, 249($sp) +; MIPS64R5EL-NEXT: sh $1, 66($sp) +; MIPS64R5EL-NEXT: lbu $1, 248($sp) +; MIPS64R5EL-NEXT: sh $1, 64($sp) +; MIPS64R5EL-NEXT: ld.h $w1, 64($sp) +; MIPS64R5EL-NEXT: copy_s.h $1, $w1[0] +; MIPS64R5EL-NEXT: copy_s.h $2, $w1[1] +; MIPS64R5EL-NEXT: sw $2, 184($sp) +; MIPS64R5EL-NEXT: sw $1, 176($sp) +; MIPS64R5EL-NEXT: ld.d $w1, 176($sp) +; MIPS64R5EL-NEXT: addv.d $w0, $w0, $w1 +; MIPS64R5EL-NEXT: sd $10, 232($sp) +; MIPS64R5EL-NEXT: lbu $1, 233($sp) +; MIPS64R5EL-NEXT: sh $1, 98($sp) +; MIPS64R5EL-NEXT: lbu $1, 232($sp) +; MIPS64R5EL-NEXT: sh $1, 96($sp) +; MIPS64R5EL-NEXT: ld.h $w1, 96($sp) +; MIPS64R5EL-NEXT: copy_s.h $1, $w1[0] +; MIPS64R5EL-NEXT: copy_s.h $2, $w1[1] +; MIPS64R5EL-NEXT: sd $9, 240($sp) +; MIPS64R5EL-NEXT: lbu $3, 241($sp) +; MIPS64R5EL-NEXT: sh $3, 82($sp) +; MIPS64R5EL-NEXT: lbu $3, 240($sp) +; MIPS64R5EL-NEXT: sh $3, 80($sp) +; MIPS64R5EL-NEXT: ld.h $w1, 80($sp) +; MIPS64R5EL-NEXT: copy_s.h $3, $w1[0] +; MIPS64R5EL-NEXT: copy_s.h $4, $w1[1] +; MIPS64R5EL-NEXT: sw $4, 200($sp) +; MIPS64R5EL-NEXT: sw $3, 192($sp) +; MIPS64R5EL-NEXT: ld.d $w1, 192($sp) +; MIPS64R5EL-NEXT: addv.d $w0, $w0, $w1 +; MIPS64R5EL-NEXT: sw $2, 216($sp) +; MIPS64R5EL-NEXT: sw $1, 208($sp) +; MIPS64R5EL-NEXT: ld.d $w1, 208($sp) ; MIPS64R5EL-NEXT: addv.d $w0, $w0, $w1 ; MIPS64R5EL-NEXT: copy_s.d $1, $w0[0] ; MIPS64R5EL-NEXT: copy_s.d $2, $w0[1] -; MIPS64R5EL-NEXT: sb $2, 117($sp) -; MIPS64R5EL-NEXT: sb $1, 116($sp) -; MIPS64R5EL-NEXT: lh $2, 116($sp) -; MIPS64R5EL-NEXT: daddiu $sp, $sp, 176 +; MIPS64R5EL-NEXT: sb $2, 229($sp) +; MIPS64R5EL-NEXT: sb $1, 228($sp) +; MIPS64R5EL-NEXT: lh $2, 228($sp) +; MIPS64R5EL-NEXT: daddiu $sp, $sp, 288 ; MIPS64R5EL-NEXT: jr $ra ; MIPS64R5EL-NEXT: nop entry: @@ -3768,55 +3552,43 @@ define void @call_i8_2() { ; ; MIPS64R5EB-LABEL: call_i8_2: ; MIPS64R5EB: # %bb.0: # %entry -; MIPS64R5EB-NEXT: daddiu $sp, $sp, -48 -; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 48 -; MIPS64R5EB-NEXT: sd $ra, 40($sp) # 8-byte Folded Spill -; MIPS64R5EB-NEXT: sd $gp, 32($sp) # 8-byte Folded Spill +; MIPS64R5EB-NEXT: daddiu $sp, $sp, -64 +; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 64 +; MIPS64R5EB-NEXT: sd $ra, 56($sp) # 8-byte Folded Spill +; MIPS64R5EB-NEXT: sd $gp, 48($sp) # 8-byte Folded Spill ; MIPS64R5EB-NEXT: .cfi_offset 31, -8 ; MIPS64R5EB-NEXT: .cfi_offset 28, -16 ; MIPS64R5EB-NEXT: lui $1, %hi(%neg(%gp_rel(call_i8_2))) ; MIPS64R5EB-NEXT: daddu $1, $1, $25 ; MIPS64R5EB-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(call_i8_2))) ; MIPS64R5EB-NEXT: addiu $1, $zero, 1543 -; MIPS64R5EB-NEXT: sh $1, 24($sp) +; MIPS64R5EB-NEXT: sh $1, 40($sp) ; MIPS64R5EB-NEXT: addiu $1, $zero, 3080 -; MIPS64R5EB-NEXT: sh $1, 28($sp) +; MIPS64R5EB-NEXT: sh $1, 44($sp) ; MIPS64R5EB-NEXT: ld $25, %call16(i8_2)($gp) -; MIPS64R5EB-NEXT: lh $4, 24($sp) -; MIPS64R5EB-NEXT: lh $5, 28($sp) +; MIPS64R5EB-NEXT: lh $4, 40($sp) +; MIPS64R5EB-NEXT: lh $5, 44($sp) ; MIPS64R5EB-NEXT: jalr $25 ; MIPS64R5EB-NEXT: nop -; MIPS64R5EB-NEXT: sd $2, 16($sp) -; MIPS64R5EB-NEXT: ldi.b $w0, 0 -; MIPS64R5EB-NEXT: lbu $1, 16($sp) -; MIPS64R5EB-NEXT: insert.h $w0[0], $1 -; MIPS64R5EB-NEXT: lbu $1, 17($sp) -; MIPS64R5EB-NEXT: insert.h $w0[1], $1 -; MIPS64R5EB-NEXT: lbu $1, 18($sp) -; MIPS64R5EB-NEXT: insert.h $w0[2], $1 -; MIPS64R5EB-NEXT: lbu $1, 19($sp) -; MIPS64R5EB-NEXT: insert.h $w0[3], $1 -; MIPS64R5EB-NEXT: lbu $1, 20($sp) -; MIPS64R5EB-NEXT: insert.h $w0[4], $1 -; MIPS64R5EB-NEXT: lbu $1, 21($sp) -; MIPS64R5EB-NEXT: insert.h $w0[5], $1 -; MIPS64R5EB-NEXT: lbu $1, 23($sp) -; MIPS64R5EB-NEXT: lbu $2, 22($sp) -; MIPS64R5EB-NEXT: insert.h $w0[6], $2 -; MIPS64R5EB-NEXT: insert.h $w0[7], $1 +; MIPS64R5EB-NEXT: sd $2, 32($sp) +; MIPS64R5EB-NEXT: lbu $1, 33($sp) +; MIPS64R5EB-NEXT: sh $1, 2($sp) +; MIPS64R5EB-NEXT: lbu $1, 32($sp) +; MIPS64R5EB-NEXT: sh $1, 0($sp) +; MIPS64R5EB-NEXT: ld.h $w0, 0($sp) ; MIPS64R5EB-NEXT: copy_s.h $1, $w0[0] ; MIPS64R5EB-NEXT: copy_s.h $2, $w0[1] -; MIPS64R5EB-NEXT: sw $2, 12($sp) -; MIPS64R5EB-NEXT: sw $1, 4($sp) -; MIPS64R5EB-NEXT: ld.d $w0, 0($sp) +; MIPS64R5EB-NEXT: sw $2, 28($sp) +; MIPS64R5EB-NEXT: sw $1, 20($sp) +; MIPS64R5EB-NEXT: ld.d $w0, 16($sp) ; MIPS64R5EB-NEXT: copy_s.d $1, $w0[0] ; MIPS64R5EB-NEXT: copy_s.d $2, $w0[1] ; MIPS64R5EB-NEXT: ld $3, %got_disp(gv2i8)($gp) ; MIPS64R5EB-NEXT: sb $2, 1($3) ; MIPS64R5EB-NEXT: sb $1, 0($3) -; MIPS64R5EB-NEXT: ld $gp, 32($sp) # 8-byte Folded Reload -; MIPS64R5EB-NEXT: ld $ra, 40($sp) # 8-byte Folded Reload -; MIPS64R5EB-NEXT: daddiu $sp, $sp, 48 +; MIPS64R5EB-NEXT: ld $gp, 48($sp) # 8-byte Folded Reload +; MIPS64R5EB-NEXT: ld $ra, 56($sp) # 8-byte Folded Reload +; MIPS64R5EB-NEXT: daddiu $sp, $sp, 64 ; MIPS64R5EB-NEXT: jr $ra ; MIPS64R5EB-NEXT: nop ; @@ -3892,55 +3664,43 @@ define void @call_i8_2() { ; ; MIPS64R5EL-LABEL: call_i8_2: ; MIPS64R5EL: # %bb.0: # %entry -; MIPS64R5EL-NEXT: daddiu $sp, $sp, -48 -; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 48 -; MIPS64R5EL-NEXT: sd $ra, 40($sp) # 8-byte Folded Spill -; MIPS64R5EL-NEXT: sd $gp, 32($sp) # 8-byte Folded Spill +; MIPS64R5EL-NEXT: daddiu $sp, $sp, -64 +; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 64 +; MIPS64R5EL-NEXT: sd $ra, 56($sp) # 8-byte Folded Spill +; MIPS64R5EL-NEXT: sd $gp, 48($sp) # 8-byte Folded Spill ; MIPS64R5EL-NEXT: .cfi_offset 31, -8 ; MIPS64R5EL-NEXT: .cfi_offset 28, -16 ; MIPS64R5EL-NEXT: lui $1, %hi(%neg(%gp_rel(call_i8_2))) ; MIPS64R5EL-NEXT: daddu $1, $1, $25 ; MIPS64R5EL-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(call_i8_2))) ; MIPS64R5EL-NEXT: addiu $1, $zero, 1798 -; MIPS64R5EL-NEXT: sh $1, 24($sp) +; MIPS64R5EL-NEXT: sh $1, 40($sp) ; MIPS64R5EL-NEXT: addiu $1, $zero, 2060 -; MIPS64R5EL-NEXT: sh $1, 28($sp) +; MIPS64R5EL-NEXT: sh $1, 44($sp) ; MIPS64R5EL-NEXT: ld $25, %call16(i8_2)($gp) -; MIPS64R5EL-NEXT: lh $4, 24($sp) -; MIPS64R5EL-NEXT: lh $5, 28($sp) +; MIPS64R5EL-NEXT: lh $4, 40($sp) +; MIPS64R5EL-NEXT: lh $5, 44($sp) ; MIPS64R5EL-NEXT: jalr $25 ; MIPS64R5EL-NEXT: nop -; MIPS64R5EL-NEXT: sd $2, 16($sp) -; MIPS64R5EL-NEXT: ldi.b $w0, 0 -; MIPS64R5EL-NEXT: lbu $1, 16($sp) -; MIPS64R5EL-NEXT: insert.h $w0[0], $1 -; MIPS64R5EL-NEXT: lbu $1, 17($sp) -; MIPS64R5EL-NEXT: insert.h $w0[1], $1 -; MIPS64R5EL-NEXT: lbu $1, 18($sp) -; MIPS64R5EL-NEXT: insert.h $w0[2], $1 -; MIPS64R5EL-NEXT: lbu $1, 19($sp) -; MIPS64R5EL-NEXT: insert.h $w0[3], $1 -; MIPS64R5EL-NEXT: lbu $1, 20($sp) -; MIPS64R5EL-NEXT: insert.h $w0[4], $1 -; MIPS64R5EL-NEXT: lbu $1, 21($sp) -; MIPS64R5EL-NEXT: insert.h $w0[5], $1 -; MIPS64R5EL-NEXT: lbu $1, 23($sp) -; MIPS64R5EL-NEXT: lbu $2, 22($sp) -; MIPS64R5EL-NEXT: insert.h $w0[6], $2 -; MIPS64R5EL-NEXT: insert.h $w0[7], $1 +; MIPS64R5EL-NEXT: sd $2, 32($sp) +; MIPS64R5EL-NEXT: lbu $1, 33($sp) +; MIPS64R5EL-NEXT: sh $1, 2($sp) +; MIPS64R5EL-NEXT: lbu $1, 32($sp) +; MIPS64R5EL-NEXT: sh $1, 0($sp) +; MIPS64R5EL-NEXT: ld.h $w0, 0($sp) ; MIPS64R5EL-NEXT: copy_s.h $1, $w0[0] ; MIPS64R5EL-NEXT: copy_s.h $2, $w0[1] -; MIPS64R5EL-NEXT: sw $2, 8($sp) -; MIPS64R5EL-NEXT: sw $1, 0($sp) -; MIPS64R5EL-NEXT: ld.d $w0, 0($sp) +; MIPS64R5EL-NEXT: sw $2, 24($sp) +; MIPS64R5EL-NEXT: sw $1, 16($sp) +; MIPS64R5EL-NEXT: ld.d $w0, 16($sp) ; MIPS64R5EL-NEXT: copy_s.d $1, $w0[0] ; MIPS64R5EL-NEXT: copy_s.d $2, $w0[1] ; MIPS64R5EL-NEXT: ld $3, %got_disp(gv2i8)($gp) ; MIPS64R5EL-NEXT: sb $2, 1($3) ; MIPS64R5EL-NEXT: sb $1, 0($3) -; MIPS64R5EL-NEXT: ld $gp, 32($sp) # 8-byte Folded Reload -; MIPS64R5EL-NEXT: ld $ra, 40($sp) # 8-byte Folded Reload -; MIPS64R5EL-NEXT: daddiu $sp, $sp, 48 +; MIPS64R5EL-NEXT: ld $gp, 48($sp) # 8-byte Folded Reload +; MIPS64R5EL-NEXT: ld $ra, 56($sp) # 8-byte Folded Reload +; MIPS64R5EL-NEXT: daddiu $sp, $sp, 64 ; MIPS64R5EL-NEXT: jr $ra ; MIPS64R5EL-NEXT: nop entry: diff --git a/llvm/test/CodeGen/X86/dagcombine-cse.ll b/llvm/test/CodeGen/X86/dagcombine-cse.ll index 7de383087515..263ce1e4d347 100644 --- a/llvm/test/CodeGen/X86/dagcombine-cse.ll +++ b/llvm/test/CodeGen/X86/dagcombine-cse.ll @@ -31,7 +31,6 @@ define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) n ; X64-NEXT: shlq $32, %rcx ; X64-NEXT: orq %rax, %rcx ; X64-NEXT: movq %rcx, %xmm0 -; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,2,4,5,6,7] ; X64-NEXT: movd %xmm0, %eax ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/extractelement-load.ll b/llvm/test/CodeGen/X86/extractelement-load.ll index 8cde110383b3..f6249c69cff4 100644 --- a/llvm/test/CodeGen/X86/extractelement-load.ll +++ b/llvm/test/CodeGen/X86/extractelement-load.ll @@ -85,8 +85,7 @@ define i64 @t4(<2 x double>* %a) { ; X32-SSE2-LABEL: t4: ; X32-SSE2: # %bb.0: ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: movapd (%eax), %xmm0 -; X32-SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] +; X32-SSE2-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X32-SSE2-NEXT: movd %xmm1, %eax ; X32-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] diff --git a/llvm/test/CodeGen/X86/known-bits-vector.ll b/llvm/test/CodeGen/X86/known-bits-vector.ll index b4d3daafa1fd..3da72f0b2f61 100644 --- a/llvm/test/CodeGen/X86/known-bits-vector.ll +++ b/llvm/test/CodeGen/X86/known-bits-vector.ll @@ -24,10 +24,9 @@ define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind { ; X32-LABEL: knownbits_mask_extract_uitofp: ; X32: # %bb.0: ; X32-NEXT: pushl %eax -; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7] +; X32-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; X32-NEXT: vmovd %xmm0, %eax -; X32-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 +; X32-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0 ; X32-NEXT: vmovss %xmm0, (%esp) ; X32-NEXT: flds (%esp) ; X32-NEXT: popl %eax @@ -35,10 +34,9 @@ define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind { ; ; X64-LABEL: knownbits_mask_extract_uitofp: ; X64: # %bb.0: -; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7] +; X64-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; X64-NEXT: vmovq %xmm0, %rax -; X64-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 +; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0 ; X64-NEXT: retq %1 = and <2 x i64> %a0, %2 = extractelement <2 x i64> %1, i32 0 diff --git a/llvm/test/CodeGen/X86/oddshuffles.ll b/llvm/test/CodeGen/X86/oddshuffles.ll index c4dd8012dd7d..fb360a3af9c0 100644 --- a/llvm/test/CodeGen/X86/oddshuffles.ll +++ b/llvm/test/CodeGen/X86/oddshuffles.ll @@ -68,41 +68,29 @@ define void @v3f64(<2 x double> %a, <2 x double> %b, <3 x double>* %p) nounwind define void @v3i32(<2 x i32> %a, <2 x i32> %b, <3 x i32>* %p) nounwind { ; SSE2-LABEL: v3i32: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] -; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; SSE2-NEXT: movd %xmm0, 8(%rdi) -; SSE2-NEXT: movq %xmm2, (%rdi) +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: movd %xmm2, 8(%rdi) +; SSE2-NEXT: movq %xmm0, (%rdi) ; SSE2-NEXT: retq ; ; SSE42-LABEL: v3i32: ; SSE42: # %bb.0: -; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] -; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] -; SSE42-NEXT: pextrd $2, %xmm0, 8(%rdi) -; SSE42-NEXT: movq %xmm1, (%rdi) +; SSE42-NEXT: extractps $2, %xmm0, 8(%rdi) +; SSE42-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE42-NEXT: movlps %xmm0, (%rdi) ; SSE42-NEXT: retq ; -; AVX1-LABEL: v3i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1] -; AVX1-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3] -; AVX1-NEXT: vextractps $2, %xmm0, 8(%rdi) -; AVX1-NEXT: vmovlps %xmm1, (%rdi) -; AVX1-NEXT: retq -; -; AVX2-LABEL: v3i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vbroadcastss %xmm1, %xmm1 -; AVX2-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3] -; AVX2-NEXT: vextractps $2, %xmm0, 8(%rdi) -; AVX2-NEXT: vmovlps %xmm1, (%rdi) -; AVX2-NEXT: retq +; AVX-LABEL: v3i32: +; AVX: # %bb.0: +; AVX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX-NEXT: vextractps $2, %xmm0, 8(%rdi) +; AVX-NEXT: vmovlps %xmm1, (%rdi) +; AVX-NEXT: retq ; ; XOP-LABEL: v3i32: ; XOP: # %bb.0: -; XOP-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1] -; XOP-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3] +; XOP-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; XOP-NEXT: vextractps $2, %xmm0, 8(%rdi) ; XOP-NEXT: vmovlps %xmm1, (%rdi) ; XOP-NEXT: retq @@ -114,10 +102,9 @@ define void @v3i32(<2 x i32> %a, <2 x i32> %b, <3 x i32>* %p) nounwind { define void @v5i16(<4 x i16> %a, <4 x i16> %b, <5 x i16>* %p) nounwind { ; SSE2-LABEL: v5i16: ; SSE2: # %bb.0: +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7] ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,2,3] ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,0,2,3,4,5,6,7] -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,3,2,3] ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; SSE2-NEXT: pextrw $6, %xmm0, %eax ; SSE2-NEXT: movw %ax, 8(%rdi) @@ -126,10 +113,9 @@ define void @v5i16(<4 x i16> %a, <4 x i16> %b, <5 x i16>* %p) nounwind { ; ; SSE42-LABEL: v5i16: ; SSE42: # %bb.0: +; SSE42-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7] ; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,2,3] ; SSE42-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,0,2,3,4,5,6,7] -; SSE42-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7] -; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,3,2,3] ; SSE42-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; SSE42-NEXT: pextrw $6, %xmm0, 8(%rdi) ; SSE42-NEXT: movq %xmm2, (%rdi) @@ -137,10 +123,9 @@ define void @v5i16(<4 x i16> %a, <4 x i16> %b, <5 x i16>* %p) nounwind { ; ; AVX1-LABEL: v5i16: ; AVX1: # %bb.0: +; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7] ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,1,2,3] ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,0,2,3,4,5,6,7] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,3,2,3] ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; AVX1-NEXT: vpextrw $6, %xmm0, 8(%rdi) ; AVX1-NEXT: vmovq %xmm1, (%rdi) @@ -148,10 +133,9 @@ define void @v5i16(<4 x i16> %a, <4 x i16> %b, <5 x i16>* %p) nounwind { ; ; AVX2-SLOW-LABEL: v5i16: ; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7] ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,1,2,3] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,0,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,3,2,3] ; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; AVX2-SLOW-NEXT: vpextrw $6, %xmm0, 8(%rdi) ; AVX2-SLOW-NEXT: vmovq %xmm1, (%rdi) @@ -160,7 +144,7 @@ define void @v5i16(<4 x i16> %a, <4 x i16> %b, <5 x i16>* %p) nounwind { ; AVX2-FAST-LABEL: v5i16: ; AVX2-FAST: # %bb.0: ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,8,9,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[0,1,4,5,12,13,14,15,8,9,10,11,12,13,14,15] +; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7] ; AVX2-FAST-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; AVX2-FAST-NEXT: vpextrw $6, %xmm0, 8(%rdi) ; AVX2-FAST-NEXT: vmovq %xmm1, (%rdi) @@ -168,7 +152,7 @@ define void @v5i16(<4 x i16> %a, <4 x i16> %b, <5 x i16>* %p) nounwind { ; ; XOP-LABEL: v5i16: ; XOP: # %bb.0: -; XOP-NEXT: vpperm {{.*#+}} xmm1 = xmm0[0,1],xmm1[4,5],xmm0[4,5],xmm1[8,9],xmm0[12,13],xmm1[4,5],xmm0[14,15],xmm1[6,7] +; XOP-NEXT: vpperm {{.*#+}} xmm1 = xmm0[0,1],xmm1[4,5],xmm0[4,5],xmm1[8,9],xmm0[4,5],xmm1[4,5],xmm0[6,7],xmm1[6,7] ; XOP-NEXT: vpextrw $6, %xmm0, 8(%rdi) ; XOP-NEXT: vmovq %xmm1, (%rdi) ; XOP-NEXT: retq @@ -377,23 +361,24 @@ define void @v7i32(<4 x i32> %a, <4 x i32> %b, <7 x i32>* %p) nounwind { ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,2,2] ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,1,0,3] ; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[3,0] -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSE2-NEXT: movd %xmm1, 24(%rdi) -; SSE2-NEXT: movlps %xmm0, 16(%rdi) +; SSE2-NEXT: movq %xmm0, 16(%rdi) ; SSE2-NEXT: movdqa %xmm3, (%rdi) ; SSE2-NEXT: retq ; ; SSE42-LABEL: v7i32: ; SSE42: # %bb.0: -; SSE42-NEXT: movdqa %xmm1, %xmm2 -; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,3],xmm2[4,5,6,7] -; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] -; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,3,2] -; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,0,3] +; SSE42-NEXT: movdqa %xmm0, %xmm2 +; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7] +; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,3,2] +; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] ; SSE42-NEXT: movd %xmm1, 24(%rdi) -; SSE42-NEXT: movq %xmm2, 16(%rdi) -; SSE42-NEXT: movdqa %xmm0, (%rdi) +; SSE42-NEXT: movq %xmm0, 16(%rdi) +; SSE42-NEXT: movdqa %xmm2, (%rdi) ; SSE42-NEXT: retq ; ; AVX1-LABEL: v7i32: diff --git a/llvm/test/CodeGen/X86/scalar_widen_div.ll b/llvm/test/CodeGen/X86/scalar_widen_div.ll index eb6670b709c3..3df758f018d5 100644 --- a/llvm/test/CodeGen/X86/scalar_widen_div.ll +++ b/llvm/test/CodeGen/X86/scalar_widen_div.ll @@ -427,7 +427,6 @@ define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) { ; CHECK-NEXT: pextrd $2, %xmm1, %r8d ; CHECK-NEXT: cltd ; CHECK-NEXT: idivl %r8d -; CHECK-NEXT: pinsrd $2, %eax, %xmm2 ; CHECK-NEXT: movl %eax, 8(%rdi,%rcx) ; CHECK-NEXT: movq %xmm2, (%rdi,%rcx) ; CHECK-NEXT: addq $16, %rcx diff --git a/llvm/test/CodeGen/X86/vec_shift7.ll b/llvm/test/CodeGen/X86/vec_shift7.ll index 1624ae7346ce..2dfad54df566 100644 --- a/llvm/test/CodeGen/X86/vec_shift7.ll +++ b/llvm/test/CodeGen/X86/vec_shift7.ll @@ -7,12 +7,9 @@ define i64 @test1(<2 x i64> %a) { ; X32-LABEL: test1: ; X32: # %bb.0: # %entry -; X32-NEXT: movdqa %xmm0, %xmm1 -; X32-NEXT: psllq $2, %xmm1 -; X32-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] -; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3] -; X32-NEXT: movd %xmm1, %edx ; X32-NEXT: movd %xmm0, %eax +; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; X32-NEXT: movd %xmm0, %edx ; X32-NEXT: retl ; ; X64-LABEL: test1: