diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index d7698a5ec962..28bd80610c84 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -125,6 +125,7 @@ cl::desc("use absolute jump tables on ppc"), cl::Hidden); STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumSiblingCalls, "Number of sibling calls"); +STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM"); static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int); @@ -1505,6 +1506,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP"; case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP"; + case PPCISD::SCALAR_TO_VECTOR_PERMUTED: + return "PPCISD::SCALAR_TO_VECTOR_PERMUTED"; case PPCISD::ANDI_rec_1_EQ_BIT: return "PPCISD::ANDI_rec_1_EQ_BIT"; case PPCISD::ANDI_rec_1_GT_BIT: @@ -2716,7 +2719,8 @@ static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) { for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); UI != UE; ++UI) if (UI.getUse().get().getResNo() == 0 && - UI->getOpcode() != ISD::SCALAR_TO_VECTOR) + UI->getOpcode() != ISD::SCALAR_TO_VECTOR && + UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED) return false; return true; @@ -9041,7 +9045,8 @@ static const SDValue *getNormalLoadInput(const SDValue &Op) { const SDValue *InputLoad = &Op; if (InputLoad->getOpcode() == ISD::BITCAST) InputLoad = &InputLoad->getOperand(0); - if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR) + if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR || + InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) InputLoad = &InputLoad->getOperand(0); if (InputLoad->getOpcode() != ISD::LOAD) return nullptr; @@ -9690,6 +9695,15 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); ShuffleVectorSDNode *SVOp = cast(Op); + + // Any nodes that were combined in the target-independent combiner prior + // to vector legalization will not be sent to the target combine. Try to + // combine it here. + if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) { + DAG.ReplaceAllUsesOfValueWith(Op, NewShuffle); + Op = NewShuffle; + SVOp = cast(Op); + } EVT VT = Op.getValueType(); bool isLittleEndian = Subtarget.isLittleEndian(); @@ -9715,6 +9729,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4; else Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8; + + // If we are loading a partial vector, it does not make sense to adjust + // the base pointer. This happens with (splat (s_to_v_permuted (ld))). + if (LD->getMemoryVT().getSizeInBits() == (IsFourByte ? 32 : 64)) + Offset = 0; SDValue BasePtr = LD->getBasePtr(); if (Offset != 0) BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), @@ -9988,7 +10007,13 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, MVT::i32)); } + ShufflesHandledWithVPERM++; SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask); + LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n"); + LLVM_DEBUG(SVOp->dump()); + LLVM_DEBUG(dbgs() << "With the following permute control vector:\n"); + LLVM_DEBUG(VPermMask.dump()); + if (isLittleEndian) return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V2, V1, VPermMask); @@ -14114,6 +14139,199 @@ SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N, return Val; } +static bool isAlternatingShuffMask(const ArrayRef &Mask, int NumElts) { + // Check that the source of the element keeps flipping + // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts). + bool PrevElemFromFirstVec = Mask[0] < NumElts; + for (int i = 1, e = Mask.size(); i < e; i++) { + if (PrevElemFromFirstVec && Mask[i] < NumElts) + return false; + if (!PrevElemFromFirstVec && Mask[i] >= NumElts) + return false; + PrevElemFromFirstVec = !PrevElemFromFirstVec; + } + return true; +} + +static bool isSplatBV(SDValue Op) { + if (Op.getOpcode() != ISD::BUILD_VECTOR) + return false; + SDValue FirstOp; + + // Find first non-undef input. + for (int i = 0, e = Op.getNumOperands(); i < e; i++) { + FirstOp = Op.getOperand(i); + if (!FirstOp.isUndef()) + break; + } + + // All inputs are undef or the same as the first non-undef input. + for (int i = 1, e = Op.getNumOperands(); i < e; i++) + if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef()) + return false; + return true; +} + +static SDValue isScalarToVec(SDValue Op) { + if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR) + return Op; + if (Op.getOpcode() != ISD::BITCAST) + return SDValue(); + Op = Op.getOperand(0); + if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR) + return Op; + return SDValue(); +} + +static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl &ShuffV, + int LHSMaxIdx, int RHSMinIdx, + int RHSMaxIdx, int HalfVec) { + for (int i = 0, e = ShuffV.size(); i < e; i++) { + int Idx = ShuffV[i]; + if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx)) + ShuffV[i] += HalfVec; + } + return; +} + +// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if +// the original is: +// ( (scalar_to_vector (Ty (extract_elt %a, C)))) +// In such a case, just change the shuffle mask to extract the element +// from the permuted index. +static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG) { + SDLoc dl(OrigSToV); + EVT VT = OrigSToV.getValueType(); + assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && + "Expecting a SCALAR_TO_VECTOR here"); + SDValue Input = OrigSToV.getOperand(0); + + if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + ConstantSDNode *Idx = dyn_cast(Input.getOperand(1)); + SDValue OrigVector = Input.getOperand(0); + + // Can't handle non-const element indices or different vector types + // for the input to the extract and the output of the scalar_to_vector. + if (Idx && VT == OrigVector.getValueType()) { + SmallVector NewMask(VT.getVectorNumElements(), -1); + NewMask[VT.getVectorNumElements() / 2] = Idx->getZExtValue(); + return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask); + } + } + return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT, + OrigSToV.getOperand(0)); +} + +// On little endian subtargets, combine shuffles such as: +// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, , %b +// into: +// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, , %b +// because the latter can be matched to a single instruction merge. +// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute +// to put the value into element zero. Adjust the shuffle mask so that the +// vector can remain in permuted form (to prevent a swap prior to a shuffle). +SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN, + SelectionDAG &DAG) const { + SDValue LHS = SVN->getOperand(0); + SDValue RHS = SVN->getOperand(1); + auto Mask = SVN->getMask(); + int NumElts = LHS.getValueType().getVectorNumElements(); + SDValue Res(SVN, 0); + SDLoc dl(SVN); + + // None of these combines are useful on big endian systems since the ISA + // already has a big endian bias. + if (!Subtarget.isLittleEndian()) + return Res; + + // If this is not a shuffle of a shuffle and the first element comes from + // the second vector, canonicalize to the commuted form. This will make it + // more likely to match one of the single instruction patterns. + if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE && + RHS.getOpcode() != ISD::VECTOR_SHUFFLE) { + std::swap(LHS, RHS); + Res = DAG.getCommutedVectorShuffle(*SVN); + Mask = cast(Res)->getMask(); + } + + // Adjust the shuffle mask if either input vector comes from a + // SCALAR_TO_VECTOR and keep the respective input vector in permuted + // form (to prevent the need for a swap). + SmallVector ShuffV(Mask.begin(), Mask.end()); + SDValue SToVLHS = isScalarToVec(LHS); + SDValue SToVRHS = isScalarToVec(RHS); + if (SToVLHS || SToVRHS) { + int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements() + : SToVRHS.getValueType().getVectorNumElements(); + int NumEltsOut = ShuffV.size(); + + // Initially assume that neither input is permuted. These will be adjusted + // accordingly if either input is. + int LHSMaxIdx = -1; + int RHSMinIdx = -1; + int RHSMaxIdx = -1; + int HalfVec = LHS.getValueType().getVectorNumElements() / 2; + + // Get the permuted scalar to vector nodes for the source(s) that come from + // ISD::SCALAR_TO_VECTOR. + if (SToVLHS) { + // Set up the values for the shuffle vector fixup. + LHSMaxIdx = NumEltsOut / NumEltsIn; + SToVLHS = getSToVPermuted(SToVLHS, DAG); + if (SToVLHS.getValueType() != LHS.getValueType()) + SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS); + LHS = SToVLHS; + } + if (SToVRHS) { + RHSMinIdx = NumEltsOut; + RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx; + SToVRHS = getSToVPermuted(SToVRHS, DAG); + if (SToVRHS.getValueType() != RHS.getValueType()) + SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS); + RHS = SToVRHS; + } + + // Fix up the shuffle mask to reflect where the desired element actually is. + // The minimum and maximum indices that correspond to element zero for both + // the LHS and RHS are computed and will control which shuffle mask entries + // are to be changed. For example, if the RHS is permuted, any shuffle mask + // entries in the range [RHSMinIdx,RHSMaxIdx) will be incremented by + // HalfVec to refer to the corresponding element in the permuted vector. + fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx, + HalfVec); + Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV); + + // We may have simplified away the shuffle. We won't be able to do anything + // further with it here. + if (!isa(Res)) + return Res; + Mask = cast(Res)->getMask(); + } + + // The common case after we commuted the shuffle is that the RHS is a splat + // and we have elements coming in from the splat at indices that are not + // conducive to using a merge. + // Example: + // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, + if (!isSplatBV(RHS)) + return Res; + + // We are looking for a mask such that all even elements are from + // one vector and all odd elements from the other. + if (!isAlternatingShuffMask(Mask, NumElts)) + return Res; + + // Adjust the mask so we are pulling in the same index from the splat + // as the index from the interesting vector in consecutive elements. + // Example: + // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, + for (int i = 1, e = Mask.size(); i < e; i += 2) + ShuffV[i] = (ShuffV[i - 1] + NumElts); + + Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV); + return Res; +} + SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase, DAGCombinerInfo &DCI) const { @@ -14223,7 +14441,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, LSBaseSDNode* LSBase = cast(N->getOperand(0)); return combineVReverseMemOP(cast(N), LSBase, DCI); } - break; + return combineVectorShuffle(cast(N), DCI.DAG); case ISD::STORE: { EVT Op1VT = N->getOperand(1).getValueType(); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 77252e919553..9f7c6ab53a17 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -221,6 +221,14 @@ namespace llvm { /// As with SINT_VEC_TO_FP, used for converting illegal types. UINT_VEC_TO_FP, + /// PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to + /// place the value into the least significant element of the most + /// significant doubleword in the vector. This is not element zero for + /// anything smaller than a doubleword on either endianness. This node has + /// the same semantics as SCALAR_TO_VECTOR except that the value remains in + /// the aforementioned location in the vector register. + SCALAR_TO_VECTOR_PERMUTED, + // FIXME: Remove these once the ANDI glue bug is fixed: /// i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the /// eq or gt bit of CR0 after executing andi. x, 1. This is used to @@ -1215,6 +1223,8 @@ namespace llvm { SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineVectorShuffle(ShuffleVectorSDNode *SVN, + SelectionDAG &DAG) const; SDValue combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase, DAGCombinerInfo &DCI) const; diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index e7ec1808ec3b..c43b2716cb37 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -138,6 +138,8 @@ def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def PPCldsplat : SDNode<"PPCISD::LD_SPLAT", SDT_PPCldsplat, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def PPCSToV : SDNode<"PPCISD::SCALAR_TO_VECTOR_PERMUTED", + SDTypeProfile<1, 1, []>, []>; //-------------------------- Predicate definitions ---------------------------// def HasVSX : Predicate<"PPCSubTarget->hasVSX()">; @@ -288,6 +290,11 @@ class X_XS6_RA5_RB5 opcode, bits<10> xo, string opc, } // Predicates = HasP9Vector } // AddedComplexity = 400, hasSideEffects = 0 +multiclass ScalToVecWPermute { + def : Pat<(Ty (scalar_to_vector In)), (Ty NonPermOut)>; + def : Pat<(Ty (PPCSToV In)), (Ty PermOut)>; +} + //-------------------------- Instruction definitions -------------------------// // VSX instructions require the VSX feature, they are to be selected over // equivalent Altivec patterns (as they address a larger register set) and @@ -2710,12 +2717,14 @@ def : Pat<(v2i64 (build_vector DblToLong.A, DblToLong.A)), def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)), (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 0))>; -def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)), - (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>; -def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)), - (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>; +defm : ScalToVecWPermute< + v4i32, FltToIntLoad.A, + (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1), + (COPY_TO_REGCLASS (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC)>; +defm : ScalToVecWPermute< + v4i32, FltToUIntLoad.A, + (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1), + (COPY_TO_REGCLASS (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC)>; def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)), (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>; def : Pat<(v2f64 (PPCldsplat xoaddr:$A)), @@ -2730,10 +2739,12 @@ def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)), def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)), (v2i64 (XXPERMDIs (COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>; -def : Pat<(v2i64 (scalar_to_vector DblToLongLoad.A)), - (v2i64 (XVCVDPSXDS (LXVDSX xoaddr:$A)))>; -def : Pat<(v2i64 (scalar_to_vector DblToULongLoad.A)), - (v2i64 (XVCVDPUXDS (LXVDSX xoaddr:$A)))>; +defm : ScalToVecWPermute< + v2i64, DblToLongLoad.A, + (XVCVDPSXDS (LXVDSX xoaddr:$A)), (XVCVDPSXDS (LXVDSX xoaddr:$A))>; +defm : ScalToVecWPermute< + v2i64, DblToULongLoad.A, + (XVCVDPUXDS (LXVDSX xoaddr:$A)), (XVCVDPUXDS (LXVDSX xoaddr:$A))>; } // HasVSX // Any big endian VSX subtarget. @@ -2831,9 +2842,10 @@ def : Pat; +defm : ScalToVecWPermute; def : Pat<(f64 (extractelt v2f64:$S, 0)), (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>; @@ -2943,18 +2955,24 @@ def : Pat<(PPCstore_scal_int_from_vsr (STXSDX (XSCVDPUXDS f64:$src), xoaddr:$dst)>; // Load-and-splat with fp-to-int conversion (using X-Form VSX/FP loads). -def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)), - (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>; -def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)), - (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>; -def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)), - (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS - (XFLOADf32 xoaddr:$A), VSFRC)), 0))>; -def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)), - (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS - (XFLOADf32 xoaddr:$A), VSFRC)), 0))>; +defm : ScalToVecWPermute< + v4i32, DblToIntLoad.A, + (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC), 1), + (COPY_TO_REGCLASS (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC)>; +defm : ScalToVecWPermute< + v4i32, DblToUIntLoad.A, + (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC), 1), + (COPY_TO_REGCLASS (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC)>; +defm : ScalToVecWPermute< + v2i64, FltToLongLoad.A, + (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A), VSFRC)), 0), + (SUBREG_TO_REG (i64 1), (XSCVDPSXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A), + VSFRC)), sub_64)>; +defm : ScalToVecWPermute< + v2i64, FltToULongLoad.A, + (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A), VSFRC)), 0), + (SUBREG_TO_REG (i64 1), (XSCVDPUXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A), + VSFRC)), sub_64)>; } // HasVSX, NoP9Vector // Any VSX subtarget that only has loads and stores that load in big endian @@ -3156,8 +3174,12 @@ def : Pat; // v4f32 scalar <-> vector conversions (LE) -def : Pat<(v4f32 (scalar_to_vector f32:$A)), - (v4f32 (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1))>; + // The permuted version is no better than the version that puts the value + // into the right element because XSCVDPSPN is different from all the other + // instructions used for PPCSToV. + defm : ScalToVecWPermute; def : Pat<(f32 (vector_extract v4f32:$S, 0)), (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>; def : Pat<(f32 (vector_extract v4f32:$S, 1)), @@ -3189,18 +3211,25 @@ def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))), // LIWAX - This instruction is used for sign extending i32 -> i64. // LIWZX - This instruction will be emitted for i32, f32, and when // zero-extending i32 to i64 (zext i32 -> i64). -def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))), - (v2i64 (XXPERMDIs - (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSFRC), 2))>; -def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))), - (v2i64 (XXPERMDIs - (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2))>; -def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), - (v4i32 (XXPERMDIs - (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2))>; -def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), - (v4f32 (XXPERMDIs - (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2))>; +defm : ScalToVecWPermute< + v2i64, (i64 (sextloadi32 xoaddr:$src)), + (XXPERMDIs (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSFRC), 2), + (SUBREG_TO_REG (i64 1), (LIWAX xoaddr:$src), sub_64)>; + +defm : ScalToVecWPermute< + v2i64, (i64 (zextloadi32 xoaddr:$src)), + (XXPERMDIs (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2), + (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>; + +defm : ScalToVecWPermute< + v4i32, (i32 (load xoaddr:$src)), + (XXPERMDIs (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2), + (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>; + +defm : ScalToVecWPermute< + v4f32, (f32 (load xoaddr:$src)), + (XXPERMDIs (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2), + (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>; def : Pat vector conversions (LE) - def : Pat<(v16i8 (scalar_to_vector i32:$A)), - (v16i8 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>; - def : Pat<(v8i16 (scalar_to_vector i32:$A)), - (v8i16 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>; - def : Pat<(v4i32 (scalar_to_vector i32:$A)), - (v4i32 MovesToVSR.LE_WORD_0)>; - def : Pat<(v2i64 (scalar_to_vector i64:$A)), - (v2i64 MovesToVSR.LE_DWORD_0)>; + defm : ScalToVecWPermute; + defm : ScalToVecWPermute; + defm : ScalToVecWPermute; + defm : ScalToVecWPermute; + // v2i64 scalar <-> vector conversions (LE) def : Pat<(i64 (vector_extract v2i64:$S, 0)), (i64 VectorExtractions.LE_DWORD_0)>; @@ -3641,30 +3673,41 @@ def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; // Build vectors from i8 loads -def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)), - (v16i8 (VSPLTBs 7, (LXSIBZX xoaddr:$src)))>; -def : Pat<(v8i16 (scalar_to_vector ScalarLoads.ZELi8)), - (v8i16 (VSPLTHs 3, (LXSIBZX xoaddr:$src)))>; -def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi8)), - (v4i32 (XXSPLTWs (LXSIBZX xoaddr:$src), 1))>; -def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi8i64)), - (v2i64 (XXPERMDIs (LXSIBZX xoaddr:$src), 0))>; -def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi8)), - (v4i32 (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1))>; -def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi8i64)), - (v2i64 (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0))>; +defm : ScalToVecWPermute; +defm : ScalToVecWPermute; +defm : ScalToVecWPermute; +defm : ScalToVecWPermute; +defm : ScalToVecWPermute; +defm : ScalToVecWPermute; // Build vectors from i16 loads -def : Pat<(v8i16 (scalar_to_vector ScalarLoads.Li16)), - (v8i16 (VSPLTHs 3, (LXSIHZX xoaddr:$src)))>; -def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi16)), - (v4i32 (XXSPLTWs (LXSIHZX xoaddr:$src), 1))>; -def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi16i64)), - (v2i64 (XXPERMDIs (LXSIHZX xoaddr:$src), 0))>; -def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi16)), - (v4i32 (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1))>; -def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)), - (v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>; +defm : ScalToVecWPermute; +defm : ScalToVecWPermute; +defm : ScalToVecWPermute; +defm : ScalToVecWPermute; +defm : ScalToVecWPermute; // Load/convert and convert/store patterns for f16. def : Pat<(f64 (extloadf16 xoaddr:$src)), @@ -3806,8 +3849,7 @@ def : Pat<(f32 (PPCxsminc f32:$XA, f32:$XB)), VSSRC))>; // Endianness-neutral patterns for const splats with ISA 3.0 instructions. -def : Pat<(v4i32 (scalar_to_vector i32:$A)), - (v4i32 (MTVSRWS $A))>; +defm : ScalToVecWPermute; def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), (v4i32 (MTVSRWS $A))>; def : Pat<(v16i8 (build_vector immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, @@ -3819,24 +3861,32 @@ def : Pat<(v16i8 (build_vector immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A)), (v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>; -def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)), - (v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>; -def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)), - (v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>; -def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)), - (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>; -def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)), - (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>; -def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)), - (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS - (DFLOADf32 iaddrX4:$A), - VSFRC)), 0))>; -def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)), - (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS - (DFLOADf32 iaddrX4:$A), - VSFRC)), 0))>; +defm : ScalToVecWPermute; +defm : ScalToVecWPermute; +defm : ScalToVecWPermute< + v4i32, DblToIntLoadP9.A, + (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1), + (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), sub_64)>; +defm : ScalToVecWPermute< + v4i32, DblToUIntLoadP9.A, + (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1), + (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), sub_64)>; +defm : ScalToVecWPermute< + v2i64, FltToLongLoadP9.A, + (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), 0), + (SUBREG_TO_REG + (i64 1), + (XSCVDPSXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), sub_64)>; +defm : ScalToVecWPermute< + v2i64, FltToULongLoadP9.A, + (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), 0), + (SUBREG_TO_REG + (i64 1), + (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), sub_64)>; def : Pat<(v4f32 (PPCldsplat xoaddr:$A)), (v4f32 (LXVWSX xoaddr:$A))>; def : Pat<(v4i32 (PPCldsplat xoaddr:$A)), @@ -4116,19 +4166,23 @@ def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst), def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst), (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; -def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))), - (v2i64 (XXPERMDIs - (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2))>; -def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))), - (v2i64 (XXPERMDIs - (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2))>; +defm : ScalToVecWPermute< + v2i64, (i64 (load iaddrX4:$src)), + (XXPERMDIs (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2), + (SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64)>; +defm : ScalToVecWPermute< + v2i64, (i64 (load xaddrX4:$src)), + (XXPERMDIs (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2), + (SUBREG_TO_REG (i64 1), (XFLOADf64 xaddrX4:$src), sub_64)>; +defm : ScalToVecWPermute< + v2f64, (f64 (load iaddrX4:$src)), + (XXPERMDIs (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2), + (SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64)>; +defm : ScalToVecWPermute< + v2f64, (f64 (load xaddrX4:$src)), + (XXPERMDIs (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2), + (SUBREG_TO_REG (i64 1), (XFLOADf64 xaddrX4:$src), sub_64)>; -def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))), - (v2f64 (XXPERMDIs - (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2))>; -def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))), - (v2f64 (XXPERMDIs - (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2))>; def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src), (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), xaddrX4:$src)>; diff --git a/llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll b/llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll index 8c9ffa815467..4d06571d0ec7 100644 --- a/llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll +++ b/llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll @@ -13,8 +13,7 @@ define void @testExpandPostRAPseudo(i32* nocapture readonly %ptr) { ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8: lfiwzx f0, 0, r3 ; CHECK-P8: ld r4, .LC0@toc@l(r4) -; CHECK-P8: xxswapd vs0, f0 -; CHECK-P8: xxspltw v2, vs0, 3 +; CHECK-P8: xxspltw v2, vs0, 1 ; CHECK-P8: stvx v2, 0, r4 ; CHECK-P8: lis r4, 1024 ; CHECK-P8: lfiwax f0, 0, r3 diff --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll index ee0cc41ea6bd..1cb7d7b62055 100644 --- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll +++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll @@ -1282,8 +1282,7 @@ define <4 x i32> @spltMemVali(i32* nocapture readonly %ptr) { ; P8LE-LABEL: spltMemVali: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: lfiwzx f0, 0, r3 -; P8LE-NEXT: xxswapd vs0, f0 -; P8LE-NEXT: xxspltw v2, vs0, 3 +; P8LE-NEXT: xxspltw v2, vs0, 1 ; P8LE-NEXT: blr entry: %0 = load i32, i32* %ptr, align 4 @@ -2801,8 +2800,7 @@ define <4 x i32> @spltMemValui(i32* nocapture readonly %ptr) { ; P8LE-LABEL: spltMemValui: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: lfiwzx f0, 0, r3 -; P8LE-NEXT: xxswapd vs0, f0 -; P8LE-NEXT: xxspltw v2, vs0, 3 +; P8LE-NEXT: xxspltw v2, vs0, 1 ; P8LE-NEXT: blr entry: %0 = load i32, i32* %ptr, align 4 @@ -4573,7 +4571,7 @@ define <2 x i64> @spltMemValConvftoll(float* nocapture readonly %ptr) { ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: lfs f0, 0(r3) ; P9LE-NEXT: xscvdpsxds f0, f0 -; P9LE-NEXT: xxspltd v2, f0, 0 +; P9LE-NEXT: xxspltd v2, vs0, 0 ; P9LE-NEXT: blr ; ; P8BE-LABEL: spltMemValConvftoll: @@ -4587,7 +4585,7 @@ define <2 x i64> @spltMemValConvftoll(float* nocapture readonly %ptr) { ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: lfsx f0, 0, r3 ; P8LE-NEXT: xscvdpsxds f0, f0 -; P8LE-NEXT: xxspltd v2, f0, 0 +; P8LE-NEXT: xxspltd v2, vs0, 0 ; P8LE-NEXT: blr entry: %0 = load float, float* %ptr, align 4 @@ -5761,7 +5759,7 @@ define <2 x i64> @spltMemValConvftoull(float* nocapture readonly %ptr) { ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: lfs f0, 0(r3) ; P9LE-NEXT: xscvdpuxds f0, f0 -; P9LE-NEXT: xxspltd v2, f0, 0 +; P9LE-NEXT: xxspltd v2, vs0, 0 ; P9LE-NEXT: blr ; ; P8BE-LABEL: spltMemValConvftoull: @@ -5775,7 +5773,7 @@ define <2 x i64> @spltMemValConvftoull(float* nocapture readonly %ptr) { ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: lfsx f0, 0, r3 ; P8LE-NEXT: xscvdpuxds f0, f0 -; P8LE-NEXT: xxspltd v2, f0, 0 +; P8LE-NEXT: xxspltd v2, vs0, 0 ; P8LE-NEXT: blr entry: %0 = load float, float* %ptr, align 4 diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll index 2ffe98e1f694..7fac0511e3c5 100644 --- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll +++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll @@ -23,18 +23,12 @@ entry: define dso_local <16 x i8> @testmrghb2(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { ; CHECK-P8-LABEL: testmrghb2: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-P8-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-P8-NEXT: lvx v4, 0, r3 -; CHECK-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-P8-NEXT: vmrghb v2, v2, v3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: testmrghb2: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-P9-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-P9-NEXT: lxvx v4, 0, r3 -; CHECK-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-P9-NEXT: vmrghb v2, v2, v3 ; CHECK-P9-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -57,18 +51,12 @@ entry: define dso_local <16 x i8> @testmrghh2(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { ; CHECK-P8-LABEL: testmrghh2: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; CHECK-P8-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-P8-NEXT: lvx v4, 0, r3 -; CHECK-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-P8-NEXT: vmrghh v2, v2, v3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: testmrghh2: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; CHECK-P9-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-P9-NEXT: lxvx v4, 0, r3 -; CHECK-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-P9-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -91,18 +79,12 @@ entry: define dso_local <16 x i8> @testmrglb2(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { ; CHECK-P8-LABEL: testmrglb2: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; CHECK-P8-NEXT: addi r3, r3, .LCPI5_0@toc@l -; CHECK-P8-NEXT: lvx v4, 0, r3 -; CHECK-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-P8-NEXT: vmrglb v2, v2, v3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: testmrglb2: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; CHECK-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l -; CHECK-P9-NEXT: lxvx v4, 0, r3 -; CHECK-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-P9-NEXT: vmrglb v2, v2, v3 ; CHECK-P9-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -125,18 +107,12 @@ entry: define dso_local <16 x i8> @testmrglh2(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { ; CHECK-P8-LABEL: testmrglh2: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: addis r3, r2, .LCPI7_0@toc@ha -; CHECK-P8-NEXT: addi r3, r3, .LCPI7_0@toc@l -; CHECK-P8-NEXT: lvx v4, 0, r3 -; CHECK-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-P8-NEXT: vmrglh v2, v2, v3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: testmrglh2: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: addis r3, r2, .LCPI7_0@toc@ha -; CHECK-P9-NEXT: addi r3, r3, .LCPI7_0@toc@l -; CHECK-P9-NEXT: lxvx v4, 0, r3 -; CHECK-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-P9-NEXT: vmrglh v2, v2, v3 ; CHECK-P9-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -159,18 +135,12 @@ entry: define dso_local <16 x i8> @testmrghw2(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { ; CHECK-P8-LABEL: testmrghw2: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: addis r3, r2, .LCPI9_0@toc@ha -; CHECK-P8-NEXT: addi r3, r3, .LCPI9_0@toc@l -; CHECK-P8-NEXT: lvx v4, 0, r3 -; CHECK-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-P8-NEXT: vmrghw v2, v2, v3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: testmrghw2: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: addis r3, r2, .LCPI9_0@toc@ha -; CHECK-P9-NEXT: addi r3, r3, .LCPI9_0@toc@l -; CHECK-P9-NEXT: lxvx v4, 0, r3 -; CHECK-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-P9-NEXT: vmrghw v2, v2, v3 ; CHECK-P9-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -193,18 +163,12 @@ entry: define dso_local <16 x i8> @testmrglw2(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { ; CHECK-P8-LABEL: testmrglw2: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: addis r3, r2, .LCPI11_0@toc@ha -; CHECK-P8-NEXT: addi r3, r3, .LCPI11_0@toc@l -; CHECK-P8-NEXT: lvx v4, 0, r3 -; CHECK-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-P8-NEXT: vmrglw v2, v2, v3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: testmrglw2: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: addis r3, r2, .LCPI11_0@toc@ha -; CHECK-P9-NEXT: addi r3, r3, .LCPI11_0@toc@l -; CHECK-P9-NEXT: lxvx v4, 0, r3 -; CHECK-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-P9-NEXT: vmrglw v2, v2, v3 ; CHECK-P9-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -215,24 +179,16 @@ define dso_local <8 x i16> @testmrglb3(<8 x i8>* nocapture readonly %a) local_un ; CHECK-P8-LABEL: testmrglb3: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: ld r3, 0(r3) -; CHECK-P8-NEXT: addis r4, r2, .LCPI12_0@toc@ha -; CHECK-P8-NEXT: xxlxor v4, v4, v4 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: addi r3, r4, .LCPI12_0@toc@l -; CHECK-P8-NEXT: lvx v3, 0, r3 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-P8-NEXT: xxlxor v2, v2, v2 +; CHECK-P8-NEXT: mtvsrd v3, r3 +; CHECK-P8-NEXT: vmrghb v2, v2, v3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: testmrglb3: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lfd f0, 0(r3) -; CHECK-P9-NEXT: addis r3, r2, .LCPI12_0@toc@ha -; CHECK-P9-NEXT: addi r3, r3, .LCPI12_0@toc@l -; CHECK-P9-NEXT: lxvx v3, 0, r3 -; CHECK-P9-NEXT: xxswapd v2, f0 -; CHECK-P9-NEXT: xxlxor v4, v4, v4 -; CHECK-P9-NEXT: vperm v2, v2, v4, v3 +; CHECK-P9-NEXT: lxsd v2, 0(r3) +; CHECK-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-P9-NEXT: blr entry: %0 = load <8 x i8>, <8 x i8>* %a, align 8 diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll index a23db59635a4..3a43b3584caf 100644 --- a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll @@ -331,12 +331,12 @@ define <2 x float> @fptrunc_v2f32_v2f64(<2 x double> %vf1) { ; P9: # %bb.0: ; P9-NEXT: xsrsp f0, v2 ; P9-NEXT: xscvdpspn vs0, f0 -; P9-NEXT: xxsldwi v3, vs0, vs0, 1 +; P9-NEXT: xxsldwi v3, vs0, vs0, 3 ; P9-NEXT: xxswapd vs0, v2 ; P9-NEXT: xsrsp f0, f0 ; P9-NEXT: xscvdpspn vs0, f0 -; P9-NEXT: xxsldwi v2, vs0, vs0, 1 -; P9-NEXT: vmrglw v2, v3, v2 +; P9-NEXT: xxsldwi v2, vs0, vs0, 3 +; P9-NEXT: vmrghw v2, v3, v2 ; P9-NEXT: blr %res = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64( <2 x double> %vf1, diff --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll index f411712ba3fa..26da1fdaefef 100644 --- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll +++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll @@ -40,8 +40,7 @@ define dso_local void @test2(<4 x float>* nocapture %c, float* nocapture readonl ; P8: # %bb.0: # %entry ; P8-NEXT: addi r4, r4, 12 ; P8-NEXT: lfiwzx f0, 0, r4 -; P8-NEXT: xxswapd vs0, f0 -; P8-NEXT: xxspltw v2, vs0, 3 +; P8-NEXT: xxspltw v2, vs0, 1 ; P8-NEXT: stvx v2, 0, r3 ; P8-NEXT: blr entry: @@ -65,8 +64,7 @@ define dso_local void @test3(<4 x i32>* nocapture %c, i32* nocapture readonly %a ; P8: # %bb.0: # %entry ; P8-NEXT: addi r4, r4, 12 ; P8-NEXT: lfiwzx f0, 0, r4 -; P8-NEXT: xxswapd vs0, f0 -; P8-NEXT: xxspltw v2, vs0, 3 +; P8-NEXT: xxspltw v2, vs0, 1 ; P8-NEXT: stvx v2, 0, r3 ; P8-NEXT: blr entry: @@ -110,8 +108,7 @@ define <16 x i8> @unadjusted_lxvwsx(i32* %s, i32* %t) { ; P8-LABEL: unadjusted_lxvwsx: ; P8: # %bb.0: # %entry ; P8-NEXT: lfiwzx f0, 0, r3 -; P8-NEXT: xxswapd vs0, f0 -; P8-NEXT: xxspltw v2, vs0, 3 +; P8-NEXT: xxspltw v2, vs0, 1 ; P8-NEXT: blr entry: %0 = bitcast i32* %s to <4 x i8>* @@ -131,8 +128,7 @@ define <16 x i8> @adjusted_lxvwsx(i64* %s, i64* %t) { ; P8: # %bb.0: # %entry ; P8-NEXT: ld r3, 0(r3) ; P8-NEXT: mtfprd f0, r3 -; P8-NEXT: xxswapd v2, vs0 -; P8-NEXT: xxspltw v2, v2, 2 +; P8-NEXT: xxspltw v2, vs0, 0 ; P8-NEXT: blr entry: %0 = bitcast i64* %s to <8 x i8>* diff --git a/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll b/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll index 409978549c36..a03ab5f9519e 100644 --- a/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll +++ b/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll @@ -9,8 +9,7 @@ define <16 x i8> @test(i32* %s, i32* %t) { ; CHECK-LE-LABEL: test: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: lfiwzx f0, 0, r3 -; CHECK-LE-NEXT: xxswapd vs0, f0 -; CHECK-LE-NEXT: xxspltw v2, vs0, 3 +; CHECK-LE-NEXT: xxspltw v2, vs0, 1 ; CHECK-LE-NEXT: blr ; CHECK-LABEL: test: diff --git a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll index e1f0e827b9f6..dffa0fb98fc0 100644 --- a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll +++ b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll @@ -21,8 +21,8 @@ entry: ; CHECK: sldi r3, r3, 56 ; CHECK: mtvsrd v2, r3 ; CHECK-LE-LABEL: buildc -; CHECK-LE: mtfprd f0, r3 -; CHECK-LE: xxswapd v2, vs0 +; CHECK-LE: mtvsrd v2, r3 +; CHECK-LE: vspltb v2, v2, 7 } ; Function Attrs: norecurse nounwind readnone @@ -35,8 +35,8 @@ entry: ; CHECK: sldi r3, r3, 48 ; CHECK: mtvsrd v2, r3 ; CHECK-LE-LABEL: builds -; CHECK-LE: mtfprd f0, r3 -; CHECK-LE: xxswapd v2, vs0 +; CHECK-LE: mtvsrd v2, r3 +; CHECK-LE: vsplth v2, v2, 3 } ; Function Attrs: norecurse nounwind readnone diff --git a/llvm/test/CodeGen/PowerPC/pr25080.ll b/llvm/test/CodeGen/PowerPC/pr25080.ll index 7a2fb76fd453..f87cb5b940ca 100644 --- a/llvm/test/CodeGen/PowerPC/pr25080.ll +++ b/llvm/test/CodeGen/PowerPC/pr25080.ll @@ -17,41 +17,33 @@ define <8 x i16> @pr25080(<8 x i32> %a) { ; LE-NEXT: mfvsrwz 3, 34 ; LE-NEXT: xxsldwi 1, 34, 34, 1 ; LE-NEXT: mfvsrwz 4, 35 -; LE-NEXT: xxsldwi 4, 34, 34, 3 -; LE-NEXT: mtfprd 2, 3 +; LE-NEXT: xxsldwi 2, 34, 34, 3 +; LE-NEXT: mtvsrd 36, 3 ; LE-NEXT: mffprwz 3, 0 ; LE-NEXT: xxswapd 0, 35 -; LE-NEXT: mtfprd 3, 4 -; LE-NEXT: xxsldwi 5, 35, 35, 1 +; LE-NEXT: mtvsrd 37, 4 ; LE-NEXT: mffprwz 4, 1 -; LE-NEXT: xxsldwi 7, 35, 35, 3 -; LE-NEXT: mtfprd 1, 3 -; LE-NEXT: xxswapd 33, 3 -; LE-NEXT: mffprwz 3, 4 -; LE-NEXT: mtfprd 4, 4 -; LE-NEXT: xxswapd 34, 1 +; LE-NEXT: xxsldwi 1, 35, 35, 1 +; LE-NEXT: mtvsrd 34, 3 +; LE-NEXT: mffprwz 3, 2 +; LE-NEXT: mtvsrd 32, 4 ; LE-NEXT: mffprwz 4, 0 -; LE-NEXT: mtfprd 0, 3 -; LE-NEXT: xxswapd 35, 4 -; LE-NEXT: mffprwz 3, 5 -; LE-NEXT: mtfprd 6, 4 -; LE-NEXT: xxswapd 36, 0 -; LE-NEXT: mtfprd 1, 3 -; LE-NEXT: mffprwz 3, 7 -; LE-NEXT: xxswapd 37, 6 -; LE-NEXT: vmrglh 2, 3, 2 -; LE-NEXT: xxswapd 35, 2 -; LE-NEXT: mtfprd 2, 3 -; LE-NEXT: xxswapd 32, 1 +; LE-NEXT: xxsldwi 0, 35, 35, 3 +; LE-NEXT: mtvsrd 33, 3 +; LE-NEXT: mffprwz 3, 1 +; LE-NEXT: mtvsrd 38, 4 +; LE-NEXT: mtvsrd 35, 3 +; LE-NEXT: mffprwz 3, 0 +; LE-NEXT: vmrghh 2, 0, 2 +; LE-NEXT: mtvsrd 32, 3 ; LE-NEXT: addis 3, 2, .LCPI0_1@toc@ha +; LE-NEXT: vmrghh 4, 1, 4 ; LE-NEXT: addi 3, 3, .LCPI0_1@toc@l -; LE-NEXT: xxswapd 38, 2 -; LE-NEXT: vmrglh 3, 4, 3 -; LE-NEXT: vmrglh 4, 0, 5 -; LE-NEXT: vmrglh 5, 6, 1 -; LE-NEXT: vmrglw 2, 3, 2 -; LE-NEXT: vmrglw 3, 5, 4 +; LE-NEXT: vmrghh 3, 3, 6 +; LE-NEXT: vmrghh 5, 0, 5 +; LE-NEXT: vmrglw 2, 4, 2 ; LE-NEXT: vspltish 4, 15 +; LE-NEXT: vmrglw 3, 5, 3 ; LE-NEXT: xxmrgld 34, 35, 34 ; LE-NEXT: lvx 3, 0, 3 ; LE-NEXT: xxlor 34, 34, 35 diff --git a/llvm/test/CodeGen/PowerPC/pr25157-peephole.ll b/llvm/test/CodeGen/PowerPC/pr25157-peephole.ll index 4c10c3813fb5..d3bfb910fc9f 100644 --- a/llvm/test/CodeGen/PowerPC/pr25157-peephole.ll +++ b/llvm/test/CodeGen/PowerPC/pr25157-peephole.ll @@ -58,12 +58,11 @@ L.LB38_2452: ; CHECK-LABEL: @aercalc_ ; CHECK: lfs -; CHECK: xxspltd +; CHECK: xxswapd ; CHECK: stxvd2x ; CHECK-NOT: xxswapd ; CHECK-P9-LABEL: @aercalc_ ; CHECK-P9: lfs -; CHECK-P9: xxspltd ; CHECK-P9: stxv ; CHECK-P9-NOT: xxswapd diff --git a/llvm/test/CodeGen/PowerPC/pr38087.ll b/llvm/test/CodeGen/PowerPC/pr38087.ll index e05a3d2b97aa..49b3d39bc18c 100644 --- a/llvm/test/CodeGen/PowerPC/pr38087.ll +++ b/llvm/test/CodeGen/PowerPC/pr38087.ll @@ -11,9 +11,8 @@ declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #0 define void @draw_llvm_vs_variant0(<4 x float> %x) { ; CHECK-LABEL: draw_llvm_vs_variant0: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lfd f0, 0(r3) -; CHECK-NEXT: xxswapd v3, f0 -; CHECK-NEXT: vmrglh v3, v3, v3 +; CHECK-NEXT: lxsd v3, 0(r3) +; CHECK-NEXT: vmrghh v3, v3, v3 ; CHECK-NEXT: vextsh2w v3, v3 ; CHECK-NEXT: xvcvsxwsp vs0, v3 ; CHECK-NEXT: xxspltw vs0, vs0, 2 diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll index 4c9137d86124..6584cb74bdb5 100644 --- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll +++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll @@ -11,34 +11,31 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 signext %i_stride_pix1, i8* nocapture readonly %pix2) { ; CHECK-LABEL: test_pre_inc_disable_1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lfd f0, 0(r5) +; CHECK-NEXT: lxsd v5, 0(r5) ; CHECK-NEXT: addis r5, r2, .LCPI0_0@toc@ha ; CHECK-NEXT: addi r5, r5, .LCPI0_0@toc@l ; CHECK-NEXT: lxvx v2, 0, r5 ; CHECK-NEXT: addis r5, r2, .LCPI0_1@toc@ha ; CHECK-NEXT: addi r5, r5, .LCPI0_1@toc@l ; CHECK-NEXT: lxvx v4, 0, r5 -; CHECK-NEXT: xxswapd v5, f0 -; CHECK-NEXT: xxlxor v3, v3, v3 ; CHECK-NEXT: li r5, 4 +; CHECK-NEXT: xxlxor v3, v3, v3 ; CHECK-NEXT: vperm v0, v3, v5, v2 ; CHECK-NEXT: mtctr r5 ; CHECK-NEXT: li r5, 0 -; CHECK-NEXT: vperm v1, v5, v3, v4 +; CHECK-NEXT: vperm v1, v3, v5, v4 ; CHECK-NEXT: li r6, 0 ; CHECK-NEXT: xvnegsp v5, v0 ; CHECK-NEXT: xvnegsp v0, v1 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: # %for.cond1.preheader ; CHECK-NEXT: # -; CHECK-NEXT: lfd f0, 0(r3) -; CHECK-NEXT: xxswapd v1, f0 -; CHECK-NEXT: lfdx f0, r3, r4 -; CHECK-NEXT: vperm v6, v1, v3, v4 +; CHECK-NEXT: lxsd v1, 0(r3) +; CHECK-NEXT: vperm v6, v3, v1, v4 ; CHECK-NEXT: vperm v1, v3, v1, v2 ; CHECK-NEXT: xvnegsp v1, v1 -; CHECK-NEXT: add r7, r3, r4 ; CHECK-NEXT: xvnegsp v6, v6 +; CHECK-NEXT: add r7, r3, r4 ; CHECK-NEXT: vabsduw v1, v1, v5 ; CHECK-NEXT: vabsduw v6, v6, v0 ; CHECK-NEXT: vadduwm v1, v6, v1 @@ -46,15 +43,14 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig ; CHECK-NEXT: vadduwm v1, v1, v6 ; CHECK-NEXT: xxspltw v6, v1, 2 ; CHECK-NEXT: vadduwm v1, v1, v6 -; CHECK-NEXT: xxswapd v6, f0 +; CHECK-NEXT: lxsdx v6, r3, r4 ; CHECK-NEXT: vextuwrx r3, r5, v1 -; CHECK-NEXT: vperm v7, v6, v3, v4 +; CHECK-NEXT: vperm v7, v3, v6, v4 ; CHECK-NEXT: vperm v6, v3, v6, v2 -; CHECK-NEXT: add r6, r3, r6 -; CHECK-NEXT: add r3, r7, r4 ; CHECK-NEXT: xvnegsp v6, v6 ; CHECK-NEXT: xvnegsp v1, v7 ; CHECK-NEXT: vabsduw v6, v6, v5 +; CHECK-NEXT: add r6, r3, r6 ; CHECK-NEXT: vabsduw v1, v1, v0 ; CHECK-NEXT: vadduwm v1, v1, v6 ; CHECK-NEXT: xxswapd v6, v1 @@ -62,6 +58,7 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig ; CHECK-NEXT: xxspltw v6, v1, 2 ; CHECK-NEXT: vadduwm v1, v1, v6 ; CHECK-NEXT: vextuwrx r8, r5, v1 +; CHECK-NEXT: add r3, r7, r4 ; CHECK-NEXT: add r6, r8, r6 ; CHECK-NEXT: bdnz .LBB0_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup @@ -181,29 +178,27 @@ for.cond.cleanup: ; preds = %for.cond1.preheader define signext i32 @test_pre_inc_disable_2(i8* nocapture readonly %pix1, i8* nocapture readonly %pix2) { ; CHECK-LABEL: test_pre_inc_disable_2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lfd f0, 0(r3) +; CHECK-NEXT: lxsd v2, 0(r3) ; CHECK-NEXT: addis r3, r2, .LCPI1_0@toc@ha ; CHECK-NEXT: addi r3, r3, .LCPI1_0@toc@l ; CHECK-NEXT: lxvx v4, 0, r3 ; CHECK-NEXT: addis r3, r2, .LCPI1_1@toc@ha -; CHECK-NEXT: xxswapd v2, f0 -; CHECK-NEXT: lfd f0, 0(r4) ; CHECK-NEXT: addi r3, r3, .LCPI1_1@toc@l -; CHECK-NEXT: xxlxor v3, v3, v3 ; CHECK-NEXT: lxvx v0, 0, r3 -; CHECK-NEXT: xxswapd v1, f0 -; CHECK-NEXT: vperm v5, v2, v3, v4 +; CHECK-NEXT: lxsd v1, 0(r4) +; CHECK-NEXT: xxlxor v3, v3, v3 +; CHECK-NEXT: vperm v5, v3, v2, v4 ; CHECK-NEXT: vperm v2, v3, v2, v0 ; CHECK-NEXT: vperm v0, v3, v1, v0 -; CHECK-NEXT: vperm v3, v1, v3, v4 +; CHECK-NEXT: vperm v3, v3, v1, v4 ; CHECK-NEXT: vabsduw v2, v2, v0 ; CHECK-NEXT: vabsduw v3, v5, v3 ; CHECK-NEXT: vadduwm v2, v3, v2 ; CHECK-NEXT: xxswapd v3, v2 -; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: vadduwm v2, v2, v3 ; CHECK-NEXT: xxspltw v3, v2, 2 ; CHECK-NEXT: vadduwm v2, v2, v3 +; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: vextuwrx r3, r3, v2 ; CHECK-NEXT: extsw r3, r3 ; CHECK-NEXT: blr @@ -286,16 +281,14 @@ define void @test32(i8* nocapture readonly %pix2, i32 signext %i_pix2) { ; CHECK-LABEL: test32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: add r5, r3, r4 -; CHECK-NEXT: lfiwzx f0, r3, r4 +; CHECK-NEXT: lxsiwzx v2, r3, r4 ; CHECK-NEXT: addis r3, r2, .LCPI2_0@toc@ha ; CHECK-NEXT: addi r3, r3, .LCPI2_0@toc@l ; CHECK-NEXT: lxvx v4, 0, r3 ; CHECK-NEXT: li r3, 4 -; CHECK-NEXT: xxswapd v2, f0 -; CHECK-NEXT: lfiwzx f0, r5, r3 +; CHECK-NEXT: lxsiwzx v5, r5, r3 ; CHECK-NEXT: xxlxor v3, v3, v3 ; CHECK-NEXT: vperm v2, v2, v3, v4 -; CHECK-NEXT: xxswapd v5, f0 ; CHECK-NEXT: vperm v3, v5, v3, v4 ; CHECK-NEXT: vspltisw v4, 8 ; CHECK-NEXT: vnegw v3, v3 @@ -361,16 +354,15 @@ define void @test16(i16* nocapture readonly %sums, i32 signext %delta, i32 signe ; CHECK-NEXT: lxsihzx v2, r6, r7 ; CHECK-NEXT: lxsihzx v4, r3, r4 ; CHECK-NEXT: li r6, 0 -; CHECK-NEXT: mtfprd f0, r6 +; CHECK-NEXT: mtvsrd v3, r6 ; CHECK-NEXT: vsplth v4, v4, 3 -; CHECK-NEXT: xxswapd v3, vs0 ; CHECK-NEXT: vsplth v2, v2, 3 ; CHECK-NEXT: addis r3, r2, .LCPI3_0@toc@ha ; CHECK-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-NEXT: vmrglh v2, v3, v2 -; CHECK-NEXT: vmrglh v3, v3, v4 -; CHECK-NEXT: xxlxor v4, v4, v4 -; CHECK-NEXT: vmrglw v3, v3, v4 +; CHECK-NEXT: vmrghh v4, v3, v4 +; CHECK-NEXT: vmrghh v2, v3, v2 +; CHECK-NEXT: vsplth v3, v3, 3 +; CHECK-NEXT: vmrglw v3, v4, v3 ; CHECK-NEXT: lxvx v4, 0, r3 ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: vperm v2, v2, v3, v4 @@ -446,18 +438,17 @@ define void @test8(i8* nocapture readonly %sums, i32 signext %delta, i32 signext ; CHECK-NEXT: add r6, r3, r4 ; CHECK-NEXT: lxsibzx v2, r3, r4 ; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: mtvsrd v3, r3 ; CHECK-NEXT: li r3, 8 ; CHECK-NEXT: lxsibzx v5, r6, r3 -; CHECK-NEXT: xxswapd v3, vs0 -; CHECK-NEXT: vspltb v4, v3, 15 -; CHECK-NEXT: vspltb v2, v2, 7 -; CHECK-NEXT: vmrglb v2, v3, v2 ; CHECK-NEXT: addis r3, r2, .LCPI4_0@toc@ha ; CHECK-NEXT: addi r3, r3, .LCPI4_0@toc@l +; CHECK-NEXT: vspltb v2, v2, 7 +; CHECK-NEXT: vmrghb v2, v3, v2 +; CHECK-NEXT: vspltb v4, v3, 7 ; CHECK-NEXT: vspltb v5, v5, 7 ; CHECK-NEXT: vmrglh v2, v2, v4 -; CHECK-NEXT: vmrglb v3, v3, v5 +; CHECK-NEXT: vmrghb v3, v3, v5 ; CHECK-NEXT: vmrglw v2, v2, v4 ; CHECK-NEXT: vmrglh v3, v3, v4 ; CHECK-NEXT: vmrglw v3, v4, v3 diff --git a/llvm/test/CodeGen/PowerPC/qpx-load-splat.ll b/llvm/test/CodeGen/PowerPC/qpx-load-splat.ll index 099611a7b5e3..50b864980d98 100644 --- a/llvm/test/CodeGen/PowerPC/qpx-load-splat.ll +++ b/llvm/test/CodeGen/PowerPC/qpx-load-splat.ll @@ -53,8 +53,7 @@ define <4 x float> @foof(float* nocapture readonly %a) #0 { ; CHECK-LABEL: foof: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lfiwzx f0, 0, r3 -; CHECK-NEXT: xxswapd vs0, f0 -; CHECK-NEXT: xxspltw v2, vs0, 3 +; CHECK-NEXT: xxspltw v2, vs0, 1 ; CHECK-NEXT: blr entry: %0 = load float, float* %a, align 4 @@ -68,8 +67,7 @@ define <4 x float> @foofx(float* nocapture readonly %a, i64 %idx) #0 { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: sldi r4, r4, 2 ; CHECK-NEXT: lfiwzx f0, r3, r4 -; CHECK-NEXT: xxswapd vs0, f0 -; CHECK-NEXT: xxspltw v2, vs0, 3 +; CHECK-NEXT: xxspltw v2, vs0, 1 ; CHECK-NEXT: blr entry: %p = getelementptr float, float* %a, i64 %idx diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_1.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_1.ll index b43e2c8b97af..c12f7f9a9f05 100644 --- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_1.ll +++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_1.ll @@ -13,8 +13,7 @@ define <2 x i64> @s2v_test1(i64* nocapture readonly %int64, <2 x i64> %vec) { ; P9LE-LABEL: s2v_test1: ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: lfd f0, 0(r3) -; P9LE-NEXT: xxswapd v3, f0 -; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: xxmrghd v2, v2, vs0 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test1: @@ -33,8 +32,7 @@ define <2 x i64> @s2v_test2(i64* nocapture readonly %int64, <2 x i64> %vec) { ; P9LE-LABEL: s2v_test2: ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: lfd f0, 8(r3) -; P9LE-NEXT: xxswapd v3, f0 -; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: xxmrghd v2, v2, vs0 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test2: @@ -55,8 +53,7 @@ define <2 x i64> @s2v_test3(i64* nocapture readonly %int64, <2 x i64> %vec, i32 ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: sldi r4, r7, 3 ; P9LE-NEXT: lfdx f0, r3, r4 -; P9LE-NEXT: xxswapd v3, f0 -; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: xxmrghd v2, v2, vs0 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test3 @@ -78,8 +75,7 @@ define <2 x i64> @s2v_test4(i64* nocapture readonly %int64, <2 x i64> %vec) { ; P9LE-LABEL: s2v_test4: ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: lfd f0, 8(r3) -; P9LE-NEXT: xxswapd v3, f0 -; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: xxmrghd v2, v2, vs0 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test4: @@ -99,8 +95,7 @@ define <2 x i64> @s2v_test5(<2 x i64> %vec, i64* nocapture readonly %ptr1) { ; P9LE-LABEL: s2v_test5: ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: lfd f0, 0(r5) -; P9LE-NEXT: xxswapd v3, f0 -; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: xxmrghd v2, v2, vs0 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test5: @@ -119,8 +114,7 @@ define <2 x double> @s2v_test_f1(double* nocapture readonly %f64, <2 x double> % ; P9LE-LABEL: s2v_test_f1: ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: lfd f0, 0(r3) -; P9LE-NEXT: xxswapd vs0, f0 -; P9LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P9LE-NEXT: xxmrghd v2, v2, vs0 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test_f1: @@ -132,8 +126,7 @@ define <2 x double> @s2v_test_f1(double* nocapture readonly %f64, <2 x double> % ; P8LE-LABEL: s2v_test_f1: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: lfdx f0, 0, r3 -; P8LE-NEXT: xxspltd vs0, vs0, 0 -; P8LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P8LE-NEXT: xxmrghd v2, v2, vs0 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test_f1: @@ -152,8 +145,7 @@ define <2 x double> @s2v_test_f2(double* nocapture readonly %f64, <2 x double> % ; P9LE-LABEL: s2v_test_f2: ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: lfd f0, 8(r3) -; P9LE-NEXT: xxswapd vs0, f0 -; P9LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P9LE-NEXT: xxmrghd v2, v2, vs0 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test_f2: @@ -165,8 +157,7 @@ define <2 x double> @s2v_test_f2(double* nocapture readonly %f64, <2 x double> % ; P8LE-LABEL: s2v_test_f2: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: lfd f0, 8(r3) -; P8LE-NEXT: xxspltd vs0, vs0, 0 -; P8LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P8LE-NEXT: xxmrghd v2, v2, vs0 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test_f2: @@ -187,8 +178,7 @@ define <2 x double> @s2v_test_f3(double* nocapture readonly %f64, <2 x double> % ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: sldi r4, r7, 3 ; P9LE-NEXT: lfdx f0, r3, r4 -; P9LE-NEXT: xxswapd vs0, f0 -; P9LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P9LE-NEXT: xxmrghd v2, v2, vs0 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test_f3: @@ -202,8 +192,7 @@ define <2 x double> @s2v_test_f3(double* nocapture readonly %f64, <2 x double> % ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: sldi r4, r7, 3 ; P8LE-NEXT: lfdx f0, r3, r4 -; P8LE-NEXT: xxspltd vs0, vs0, 0 -; P8LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P8LE-NEXT: xxmrghd v2, v2, vs0 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test_f3: @@ -225,8 +214,7 @@ define <2 x double> @s2v_test_f4(double* nocapture readonly %f64, <2 x double> % ; P9LE-LABEL: s2v_test_f4: ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: lfd f0, 8(r3) -; P9LE-NEXT: xxswapd vs0, f0 -; P9LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P9LE-NEXT: xxmrghd v2, v2, vs0 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test_f4: @@ -238,8 +226,7 @@ define <2 x double> @s2v_test_f4(double* nocapture readonly %f64, <2 x double> % ; P8LE-LABEL: s2v_test_f4: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: lfd f0, 8(r3) -; P8LE-NEXT: xxspltd vs0, vs0, 0 -; P8LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P8LE-NEXT: xxmrghd v2, v2, vs0 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test_f4: @@ -259,8 +246,7 @@ define <2 x double> @s2v_test_f5(<2 x double> %vec, double* nocapture readonly % ; P9LE-LABEL: s2v_test_f5: ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: lfd f0, 0(r5) -; P9LE-NEXT: xxswapd vs0, f0 -; P9LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P9LE-NEXT: xxmrghd v2, v2, vs0 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test_f5: @@ -272,8 +258,7 @@ define <2 x double> @s2v_test_f5(<2 x double> %vec, double* nocapture readonly % ; P8LE-LABEL: s2v_test_f5: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: lfdx f0, 0, r5 -; P8LE-NEXT: xxspltd vs0, vs0, 0 -; P8LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P8LE-NEXT: xxmrghd v2, v2, vs0 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test_f5: diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll index 83691b52575d..f4572c359942 100644 --- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll +++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll @@ -12,8 +12,7 @@ define <2 x i64> @s2v_test1(i32* nocapture readonly %int32, <2 x i64> %vec) { ; P9LE-LABEL: s2v_test1: ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: lfiwax f0, 0, r3 -; P9LE-NEXT: xxswapd v3, f0 -; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: xxmrghd v2, v2, vs0 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test1: @@ -25,8 +24,7 @@ define <2 x i64> @s2v_test1(i32* nocapture readonly %int32, <2 x i64> %vec) { ; P8LE-LABEL: s2v_test1: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: lfiwax f0, 0, r3 -; P8LE-NEXT: xxswapd v3, f0 -; P8LE-NEXT: xxpermdi v2, v2, v3, 1 +; P8LE-NEXT: xxmrghd v2, v2, vs0 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test1: @@ -47,8 +45,7 @@ define <2 x i64> @s2v_test2(i32* nocapture readonly %int32, <2 x i64> %vec) { ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: addi r3, r3, 4 ; P9LE-NEXT: lfiwax f0, 0, r3 -; P9LE-NEXT: xxswapd v3, f0 -; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: xxmrghd v2, v2, vs0 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test2: @@ -62,8 +59,7 @@ define <2 x i64> @s2v_test2(i32* nocapture readonly %int32, <2 x i64> %vec) { ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: addi r3, r3, 4 ; P8LE-NEXT: lfiwax f0, 0, r3 -; P8LE-NEXT: xxswapd v3, f0 -; P8LE-NEXT: xxpermdi v2, v2, v3, 1 +; P8LE-NEXT: xxmrghd v2, v2, vs0 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test2: @@ -86,8 +82,7 @@ define <2 x i64> @s2v_test3(i32* nocapture readonly %int32, <2 x i64> %vec, i32 ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: sldi r4, r7, 2 ; P9LE-NEXT: lfiwax f0, r3, r4 -; P9LE-NEXT: xxswapd v3, f0 -; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: xxmrghd v2, v2, vs0 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test3: @@ -101,8 +96,7 @@ define <2 x i64> @s2v_test3(i32* nocapture readonly %int32, <2 x i64> %vec, i32 ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: sldi r4, r7, 2 ; P8LE-NEXT: lfiwax f0, r3, r4 -; P8LE-NEXT: xxswapd v3, f0 -; P8LE-NEXT: xxpermdi v2, v2, v3, 1 +; P8LE-NEXT: xxmrghd v2, v2, vs0 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test3: @@ -126,8 +120,7 @@ define <2 x i64> @s2v_test4(i32* nocapture readonly %int32, <2 x i64> %vec) { ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: addi r3, r3, 4 ; P9LE-NEXT: lfiwax f0, 0, r3 -; P9LE-NEXT: xxswapd v3, f0 -; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: xxmrghd v2, v2, vs0 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test4: @@ -141,8 +134,7 @@ define <2 x i64> @s2v_test4(i32* nocapture readonly %int32, <2 x i64> %vec) { ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: addi r3, r3, 4 ; P8LE-NEXT: lfiwax f0, 0, r3 -; P8LE-NEXT: xxswapd v3, f0 -; P8LE-NEXT: xxpermdi v2, v2, v3, 1 +; P8LE-NEXT: xxmrghd v2, v2, vs0 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test4: @@ -164,8 +156,7 @@ define <2 x i64> @s2v_test5(<2 x i64> %vec, i32* nocapture readonly %ptr1) { ; P9LE-LABEL: s2v_test5: ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: lfiwax f0, 0, r5 -; P9LE-NEXT: xxswapd v3, f0 -; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: xxmrghd v2, v2, vs0 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test5: @@ -177,8 +168,7 @@ define <2 x i64> @s2v_test5(<2 x i64> %vec, i32* nocapture readonly %ptr1) { ; P8LE-LABEL: s2v_test5: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: lfiwax f0, 0, r5 -; P8LE-NEXT: xxswapd v3, f0 -; P8LE-NEXT: xxpermdi v2, v2, v3, 1 +; P8LE-NEXT: xxmrghd v2, v2, vs0 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test5: @@ -198,8 +188,7 @@ define <2 x i64> @s2v_test6(i32* nocapture readonly %ptr) { ; P9LE-LABEL: s2v_test6: ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: lfiwax f0, 0, r3 -; P9LE-NEXT: xxswapd v2, f0 -; P9LE-NEXT: xxspltd v2, v2, 1 +; P9LE-NEXT: xxspltd v2, vs0, 0 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test6: @@ -211,8 +200,7 @@ define <2 x i64> @s2v_test6(i32* nocapture readonly %ptr) { ; P8LE-LABEL: s2v_test6: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: lfiwax f0, 0, r3 -; P8LE-NEXT: xxswapd v2, f0 -; P8LE-NEXT: xxspltd v2, v2, 1 +; P8LE-NEXT: xxspltd v2, vs0, 0 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test6: @@ -233,8 +221,7 @@ define <2 x i64> @s2v_test7(i32* nocapture readonly %ptr) { ; P9LE-LABEL: s2v_test7: ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: lfiwax f0, 0, r3 -; P9LE-NEXT: xxswapd v2, f0 -; P9LE-NEXT: xxspltd v2, v2, 1 +; P9LE-NEXT: xxspltd v2, vs0, 0 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test7: @@ -246,8 +233,7 @@ define <2 x i64> @s2v_test7(i32* nocapture readonly %ptr) { ; P8LE-LABEL: s2v_test7: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: lfiwax f0, 0, r3 -; P8LE-NEXT: xxswapd v2, f0 -; P8LE-NEXT: xxspltd v2, v2, 1 +; P8LE-NEXT: xxspltd v2, vs0, 0 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test7: diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll index 2261d75c6619..3dc34533420c 100644 --- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll +++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll @@ -11,12 +11,11 @@ define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec) { ; P8LE-LABEL: s2v_test1: ; P8LE: # %bb.0: # %entry -; P8LE-NEXT: lfiwzx f0, 0, r3 ; P8LE-NEXT: addis r4, r2, .LCPI0_0@toc@ha -; P8LE-NEXT: addi r3, r4, .LCPI0_0@toc@l -; P8LE-NEXT: lvx v3, 0, r3 -; P8LE-NEXT: xxswapd v4, f0 -; P8LE-NEXT: vperm v2, v4, v2, v3 +; P8LE-NEXT: lxsiwzx v4, 0, r3 +; P8LE-NEXT: addi r4, r4, .LCPI0_0@toc@l +; P8LE-NEXT: lvx v3, 0, r4 +; P8LE-NEXT: vperm v2, v2, v4, v3 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test1: @@ -36,13 +35,12 @@ entry: define <4 x i32> @s2v_test2(i32* nocapture readonly %int32, <4 x i32> %vec) { ; P8LE-LABEL: s2v_test2: ; P8LE: # %bb.0: # %entry -; P8LE-NEXT: addi r3, r3, 4 ; P8LE-NEXT: addis r4, r2, .LCPI1_0@toc@ha -; P8LE-NEXT: lfiwzx f0, 0, r3 -; P8LE-NEXT: addi r3, r4, .LCPI1_0@toc@l -; P8LE-NEXT: lvx v3, 0, r3 -; P8LE-NEXT: xxswapd v4, f0 -; P8LE-NEXT: vperm v2, v4, v2, v3 +; P8LE-NEXT: addi r3, r3, 4 +; P8LE-NEXT: addi r4, r4, .LCPI1_0@toc@l +; P8LE-NEXT: lxsiwzx v4, 0, r3 +; P8LE-NEXT: lvx v3, 0, r4 +; P8LE-NEXT: vperm v2, v2, v4, v3 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test2: @@ -64,13 +62,12 @@ entry: define <4 x i32> @s2v_test3(i32* nocapture readonly %int32, <4 x i32> %vec, i32 signext %Idx) { ; P8LE-LABEL: s2v_test3: ; P8LE: # %bb.0: # %entry -; P8LE-NEXT: sldi r5, r7, 2 ; P8LE-NEXT: addis r4, r2, .LCPI2_0@toc@ha -; P8LE-NEXT: lfiwzx f0, r3, r5 -; P8LE-NEXT: addi r3, r4, .LCPI2_0@toc@l -; P8LE-NEXT: lvx v4, 0, r3 -; P8LE-NEXT: xxswapd v3, f0 -; P8LE-NEXT: vperm v2, v3, v2, v4 +; P8LE-NEXT: sldi r5, r7, 2 +; P8LE-NEXT: addi r4, r4, .LCPI2_0@toc@l +; P8LE-NEXT: lxsiwzx v3, r3, r5 +; P8LE-NEXT: lvx v4, 0, r4 +; P8LE-NEXT: vperm v2, v2, v3, v4 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test3: @@ -93,13 +90,12 @@ entry: define <4 x i32> @s2v_test4(i32* nocapture readonly %int32, <4 x i32> %vec) { ; P8LE-LABEL: s2v_test4: ; P8LE: # %bb.0: # %entry -; P8LE-NEXT: addi r3, r3, 4 ; P8LE-NEXT: addis r4, r2, .LCPI3_0@toc@ha -; P8LE-NEXT: lfiwzx f0, 0, r3 -; P8LE-NEXT: addi r3, r4, .LCPI3_0@toc@l -; P8LE-NEXT: lvx v3, 0, r3 -; P8LE-NEXT: xxswapd v4, f0 -; P8LE-NEXT: vperm v2, v4, v2, v3 +; P8LE-NEXT: addi r3, r3, 4 +; P8LE-NEXT: addi r4, r4, .LCPI3_0@toc@l +; P8LE-NEXT: lxsiwzx v4, 0, r3 +; P8LE-NEXT: lvx v3, 0, r4 +; P8LE-NEXT: vperm v2, v2, v4, v3 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test4: @@ -121,12 +117,11 @@ entry: define <4 x i32> @s2v_test5(<4 x i32> %vec, i32* nocapture readonly %ptr1) { ; P8LE-LABEL: s2v_test5: ; P8LE: # %bb.0: # %entry -; P8LE-NEXT: lfiwzx f0, 0, r5 ; P8LE-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; P8LE-NEXT: lxsiwzx v4, 0, r5 ; P8LE-NEXT: addi r3, r3, .LCPI4_0@toc@l ; P8LE-NEXT: lvx v3, 0, r3 -; P8LE-NEXT: xxswapd v4, f0 -; P8LE-NEXT: vperm v2, v4, v2, v3 +; P8LE-NEXT: vperm v2, v2, v4, v3 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test5: @@ -146,12 +141,11 @@ entry: define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec) { ; P8LE-LABEL: s2v_test_f1: ; P8LE: # %bb.0: # %entry -; P8LE-NEXT: lfiwzx f0, 0, r3 ; P8LE-NEXT: addis r4, r2, .LCPI5_0@toc@ha -; P8LE-NEXT: addi r3, r4, .LCPI5_0@toc@l -; P8LE-NEXT: lvx v3, 0, r3 -; P8LE-NEXT: xxswapd v4, f0 -; P8LE-NEXT: vperm v2, v4, v2, v3 +; P8LE-NEXT: lxsiwzx v4, 0, r3 +; P8LE-NEXT: addi r4, r4, .LCPI5_0@toc@l +; P8LE-NEXT: lvx v3, 0, r4 +; P8LE-NEXT: vperm v2, v2, v4, v3 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test_f1: @@ -172,10 +166,9 @@ define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec ; P9LE-LABEL: s2v_test_f2: ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: addi r3, r3, 4 -; P9LE-DAG: xxspltw v2, v2, 2 -; P9LE-DAG: lfiwzx f0, 0, r3 -; P9LE-NEXT: xxswapd v3, f0 -; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: lxsiwzx v3, 0, r3 +; P9LE-NEXT: vmrglw v2, v2, v2 +; P9LE-NEXT: vmrghw v2, v2, v3 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test_f2: @@ -189,11 +182,10 @@ define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec ; P8LE-LABEL: s2v_test_f2: ; P8LE: # %bb.0: # %entry +; P8LE-NEXT: vmrglw v2, v2, v2 ; P8LE-NEXT: addi r3, r3, 4 -; P8LE-NEXT: xxspltw v2, v2, 2 -; P8LE-NEXT: lfiwzx f0, 0, r3 -; P8LE-NEXT: xxswapd v3, f0 -; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: lxsiwzx v3, 0, r3 +; P8LE-NEXT: vmrghw v2, v2, v3 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test_f2: @@ -216,10 +208,9 @@ define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec ; P9LE-LABEL: s2v_test_f3: ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: sldi r4, r7, 2 -; P9LE-NEXT: lfiwzx f0, r3, r4 -; P9LE-DAG: xxspltw v2, v2, 2 -; P9LE-DAG: xxswapd v3, f0 -; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: lxsiwzx v3, r3, r4 +; P9LE-NEXT: vmrglw v2, v2, v2 +; P9LE-NEXT: vmrghw v2, v2, v3 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test_f3: @@ -233,11 +224,10 @@ define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec ; P8LE-LABEL: s2v_test_f3: ; P8LE: # %bb.0: # %entry +; P8LE-NEXT: vmrglw v2, v2, v2 ; P8LE-NEXT: sldi r4, r7, 2 -; P8LE-NEXT: xxspltw v2, v2, 2 -; P8LE-NEXT: lfiwzx f0, r3, r4 -; P8LE-NEXT: xxswapd v3, f0 -; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: lxsiwzx v3, r3, r4 +; P8LE-NEXT: vmrghw v2, v2, v3 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test_f3: @@ -261,10 +251,9 @@ define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec ; P9LE-LABEL: s2v_test_f4: ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: addi r3, r3, 4 -; P9LE-NEXT: lfiwzx f0, 0, r3 -; P9LE-DAG: xxspltw v2, v2, 2 -; P9LE-DAG: xxswapd v3, f0 -; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: lxsiwzx v3, 0, r3 +; P9LE-NEXT: vmrglw v2, v2, v2 +; P9LE-NEXT: vmrghw v2, v2, v3 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test_f4: @@ -278,11 +267,10 @@ define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec ; P8LE-LABEL: s2v_test_f4: ; P8LE: # %bb.0: # %entry +; P8LE-NEXT: vmrglw v2, v2, v2 ; P8LE-NEXT: addi r3, r3, 4 -; P8LE-NEXT: xxspltw v2, v2, 2 -; P8LE-NEXT: lfiwzx f0, 0, r3 -; P8LE-NEXT: xxswapd v3, f0 -; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: lxsiwzx v3, 0, r3 +; P8LE-NEXT: vmrghw v2, v2, v3 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test_f4: @@ -304,10 +292,9 @@ entry: define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr1) { ; P9LE-LABEL: s2v_test_f5: ; P9LE: # %bb.0: # %entry -; P9LE-NEXT: lfiwzx f0, 0, r5 -; P9LE-NEXT: xxspltw v2, v2, 2 -; P9LE-NEXT: xxswapd v3, f0 -; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: lxsiwzx v3, 0, r5 +; P9LE-NEXT: vmrglw v2, v2, v2 +; P9LE-NEXT: vmrghw v2, v2, v3 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test_f5: @@ -320,10 +307,9 @@ define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr ; P8LE-LABEL: s2v_test_f5: ; P8LE: # %bb.0: # %entry -; P8LE-NEXT: lfiwzx f0, 0, r5 -; P8LE-NEXT: xxspltw v2, v2, 2 -; P8LE-NEXT: xxswapd v3, f0 -; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: vmrglw v2, v2, v2 +; P8LE-NEXT: lxsiwzx v3, 0, r5 +; P8LE-NEXT: vmrghw v2, v2, v3 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test_f5: diff --git a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll index 935630745f47..097ba07a5b1e 100644 --- a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll @@ -13,60 +13,56 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) { ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 0 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: lis r5, -21386 -; P9LE-NEXT: ori r5, r5, 37253 -; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: mulhw r5, r4, r5 -; P9LE-NEXT: add r4, r5, r4 +; P9LE-NEXT: lis r4, -21386 +; P9LE-NEXT: ori r4, r4, 37253 +; P9LE-NEXT: extsh r3, r3 +; P9LE-NEXT: mulhw r4, r3, r4 +; P9LE-NEXT: add r4, r4, r3 ; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 6 ; P9LE-NEXT: add r4, r4, r5 -; P9LE-NEXT: lis r5, 31710 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: lis r4, 31710 +; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: ori r5, r5, 63421 -; P9LE-NEXT: mulhw r5, r4, r5 -; P9LE-NEXT: sub r4, r5, r4 +; P9LE-NEXT: extsh r3, r3 +; P9LE-NEXT: ori r4, r4, 63421 +; P9LE-NEXT: mulhw r4, r3, r4 +; P9LE-NEXT: sub r4, r4, r3 ; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 6 ; P9LE-NEXT: add r4, r4, r5 -; P9LE-NEXT: lis r5, 21399 ; P9LE-NEXT: mulli r4, r4, -124 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: lis r4, 21399 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: ori r5, r5, 33437 -; P9LE-NEXT: mulhw r4, r4, r5 +; P9LE-NEXT: extsh r3, r3 +; P9LE-NEXT: ori r4, r4, 33437 +; P9LE-NEXT: mulhw r4, r3, r4 ; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 5 ; P9LE-NEXT: add r4, r4, r5 -; P9LE-NEXT: lis r5, -16728 ; P9LE-NEXT: mulli r4, r4, 98 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: ori r5, r5, 63249 -; P9LE-NEXT: mulhw r4, r4, r5 +; P9LE-NEXT: lis r4, -16728 +; P9LE-NEXT: ori r4, r4, 63249 +; P9LE-NEXT: extsh r3, r3 +; P9LE-NEXT: mulhw r4, r3, r4 ; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 8 ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, -1003 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: vmrglh v3, v4, v3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtfprd f0, r3 -; P9LE-NEXT: xxswapd v2, vs0 -; P9LE-NEXT: vmrglh v2, v2, v4 +; P9LE-NEXT: mtvsrd v2, r3 +; P9LE-NEXT: vmrghh v2, v2, v4 ; P9LE-NEXT: vmrglw v2, v2, v3 ; P9LE-NEXT: blr ; @@ -135,58 +131,54 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) { ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 ; P8LE-NEXT: lis r3, 21399 -; P8LE-NEXT: lis r9, -21386 -; P8LE-NEXT: lis r11, 31710 ; P8LE-NEXT: lis r8, -16728 +; P8LE-NEXT: lis r9, -21386 +; P8LE-NEXT: lis r10, 31710 ; P8LE-NEXT: ori r3, r3, 33437 -; P8LE-NEXT: ori r9, r9, 37253 ; P8LE-NEXT: ori r8, r8, 63249 +; P8LE-NEXT: ori r9, r9, 37253 +; P8LE-NEXT: ori r10, r10, 63421 ; P8LE-NEXT: mffprd r4, f0 ; P8LE-NEXT: rldicl r5, r4, 32, 48 -; P8LE-NEXT: clrldi r7, r4, 48 ; P8LE-NEXT: rldicl r6, r4, 16, 48 +; P8LE-NEXT: clrldi r7, r4, 48 +; P8LE-NEXT: extsh r5, r5 +; P8LE-NEXT: extsh r6, r6 ; P8LE-NEXT: rldicl r4, r4, 48, 48 -; P8LE-NEXT: extsh r10, r5 -; P8LE-NEXT: extsh r0, r7 -; P8LE-NEXT: mulhw r3, r10, r3 -; P8LE-NEXT: ori r10, r11, 63421 -; P8LE-NEXT: extsh r11, r4 -; P8LE-NEXT: extsh r12, r6 -; P8LE-NEXT: mulhw r9, r0, r9 -; P8LE-NEXT: mulhw r10, r11, r10 -; P8LE-NEXT: mulhw r8, r12, r8 -; P8LE-NEXT: srwi r12, r3, 31 +; P8LE-NEXT: extsh r7, r7 +; P8LE-NEXT: mulhw r3, r5, r3 +; P8LE-NEXT: extsh r4, r4 +; P8LE-NEXT: mulhw r8, r6, r8 +; P8LE-NEXT: mulhw r9, r7, r9 +; P8LE-NEXT: mulhw r10, r4, r10 +; P8LE-NEXT: srwi r11, r3, 31 ; P8LE-NEXT: srawi r3, r3, 5 -; P8LE-NEXT: add r9, r9, r0 -; P8LE-NEXT: sub r10, r10, r11 -; P8LE-NEXT: add r3, r3, r12 +; P8LE-NEXT: add r3, r3, r11 +; P8LE-NEXT: srwi r11, r8, 31 +; P8LE-NEXT: add r9, r9, r7 +; P8LE-NEXT: srawi r8, r8, 8 +; P8LE-NEXT: sub r10, r10, r4 +; P8LE-NEXT: add r8, r8, r11 ; P8LE-NEXT: srwi r11, r9, 31 ; P8LE-NEXT: srawi r9, r9, 6 -; P8LE-NEXT: srwi r12, r8, 31 -; P8LE-NEXT: srawi r8, r8, 8 +; P8LE-NEXT: mulli r3, r3, 98 ; P8LE-NEXT: add r9, r9, r11 ; P8LE-NEXT: srwi r11, r10, 31 ; P8LE-NEXT: srawi r10, r10, 6 -; P8LE-NEXT: add r8, r8, r12 -; P8LE-NEXT: mulli r3, r3, 98 -; P8LE-NEXT: add r10, r10, r11 ; P8LE-NEXT: mulli r8, r8, -1003 +; P8LE-NEXT: add r10, r10, r11 ; P8LE-NEXT: mulli r9, r9, 95 ; P8LE-NEXT: mulli r10, r10, -124 ; P8LE-NEXT: sub r3, r5, r3 +; P8LE-NEXT: mtvsrd v2, r3 ; P8LE-NEXT: sub r5, r6, r8 -; P8LE-NEXT: mtfprd f0, r3 ; P8LE-NEXT: sub r3, r7, r9 +; P8LE-NEXT: mtvsrd v3, r5 ; P8LE-NEXT: sub r4, r4, r10 -; P8LE-NEXT: mtfprd f1, r5 -; P8LE-NEXT: mtfprd f2, r3 -; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: mtfprd f3, r4 -; P8LE-NEXT: xxswapd v3, vs1 -; P8LE-NEXT: xxswapd v4, vs2 -; P8LE-NEXT: xxswapd v5, vs3 -; P8LE-NEXT: vmrglh v2, v3, v2 -; P8LE-NEXT: vmrglh v3, v5, v4 +; P8LE-NEXT: mtvsrd v4, r3 +; P8LE-NEXT: mtvsrd v5, r4 +; P8LE-NEXT: vmrghh v2, v3, v2 +; P8LE-NEXT: vmrghh v3, v5, v4 ; P8LE-NEXT: vmrglw v2, v2, v3 ; P8LE-NEXT: blr ; @@ -256,56 +248,52 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) { ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 0 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: lis r5, -21386 -; P9LE-NEXT: ori r5, r5, 37253 -; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: mulhw r6, r4, r5 -; P9LE-NEXT: add r4, r6, r4 -; P9LE-NEXT: srwi r6, r4, 31 -; P9LE-NEXT: srawi r4, r4, 6 -; P9LE-NEXT: add r4, r4, r6 -; P9LE-NEXT: mulli r4, r4, 95 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: lis r4, -21386 +; P9LE-NEXT: ori r4, r4, 37253 +; P9LE-NEXT: extsh r3, r3 +; P9LE-NEXT: mulhw r5, r3, r4 +; P9LE-NEXT: add r5, r5, r3 +; P9LE-NEXT: srwi r6, r5, 31 +; P9LE-NEXT: srawi r5, r5, 6 +; P9LE-NEXT: add r5, r5, r6 +; P9LE-NEXT: mulli r5, r5, 95 +; P9LE-NEXT: sub r3, r3, r5 +; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: mulhw r6, r4, r5 -; P9LE-NEXT: add r4, r6, r4 -; P9LE-NEXT: srwi r6, r4, 31 -; P9LE-NEXT: srawi r4, r4, 6 -; P9LE-NEXT: add r4, r4, r6 -; P9LE-NEXT: mulli r4, r4, 95 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: extsh r3, r3 +; P9LE-NEXT: mulhw r5, r3, r4 +; P9LE-NEXT: add r5, r5, r3 +; P9LE-NEXT: srwi r6, r5, 31 +; P9LE-NEXT: srawi r5, r5, 6 +; P9LE-NEXT: add r5, r5, r6 +; P9LE-NEXT: mulli r5, r5, 95 +; P9LE-NEXT: sub r3, r3, r5 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: mulhw r6, r4, r5 -; P9LE-NEXT: add r4, r6, r4 -; P9LE-NEXT: srwi r6, r4, 31 -; P9LE-NEXT: srawi r4, r4, 6 -; P9LE-NEXT: add r4, r4, r6 -; P9LE-NEXT: mulli r4, r4, 95 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: extsh r3, r3 +; P9LE-NEXT: mulhw r5, r3, r4 +; P9LE-NEXT: add r5, r5, r3 +; P9LE-NEXT: srwi r6, r5, 31 +; P9LE-NEXT: srawi r5, r5, 6 +; P9LE-NEXT: add r5, r5, r6 +; P9LE-NEXT: mulli r5, r5, 95 +; P9LE-NEXT: sub r3, r3, r5 +; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: mulhw r5, r4, r5 -; P9LE-NEXT: add r4, r5, r4 +; P9LE-NEXT: extsh r3, r3 +; P9LE-NEXT: mulhw r4, r3, r4 +; P9LE-NEXT: add r4, r4, r3 ; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 6 ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: vmrglh v3, v4, v3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtfprd f0, r3 -; P9LE-NEXT: xxswapd v2, vs0 -; P9LE-NEXT: vmrglh v2, v2, v4 +; P9LE-NEXT: mtvsrd v2, r3 +; P9LE-NEXT: vmrghh v2, v2, v4 ; P9LE-NEXT: vmrglw v2, v2, v3 ; P9LE-NEXT: blr ; @@ -370,56 +358,50 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) { ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 ; P8LE-NEXT: lis r3, -21386 -; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; P8LE-NEXT: ori r3, r3, 37253 ; P8LE-NEXT: mffprd r4, f0 ; P8LE-NEXT: clrldi r5, r4, 48 ; P8LE-NEXT: rldicl r6, r4, 48, 48 -; P8LE-NEXT: extsh r8, r5 +; P8LE-NEXT: extsh r5, r5 ; P8LE-NEXT: rldicl r7, r4, 32, 48 -; P8LE-NEXT: extsh r9, r6 -; P8LE-NEXT: mulhw r10, r8, r3 +; P8LE-NEXT: extsh r6, r6 +; P8LE-NEXT: mulhw r8, r5, r3 ; P8LE-NEXT: rldicl r4, r4, 16, 48 -; P8LE-NEXT: extsh r11, r7 -; P8LE-NEXT: mulhw r12, r9, r3 -; P8LE-NEXT: extsh r0, r4 -; P8LE-NEXT: mulhw r30, r11, r3 -; P8LE-NEXT: mulhw r3, r0, r3 -; P8LE-NEXT: add r8, r10, r8 -; P8LE-NEXT: add r9, r12, r9 -; P8LE-NEXT: srwi r10, r8, 31 +; P8LE-NEXT: extsh r7, r7 +; P8LE-NEXT: mulhw r9, r6, r3 +; P8LE-NEXT: extsh r4, r4 +; P8LE-NEXT: mulhw r10, r7, r3 +; P8LE-NEXT: mulhw r3, r4, r3 +; P8LE-NEXT: add r8, r8, r5 +; P8LE-NEXT: add r9, r9, r6 +; P8LE-NEXT: srwi r11, r8, 31 ; P8LE-NEXT: srawi r8, r8, 6 -; P8LE-NEXT: add r11, r30, r11 -; P8LE-NEXT: add r3, r3, r0 -; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; P8LE-NEXT: add r8, r8, r10 -; P8LE-NEXT: srwi r10, r9, 31 +; P8LE-NEXT: add r10, r10, r7 +; P8LE-NEXT: add r3, r3, r4 +; P8LE-NEXT: add r8, r8, r11 +; P8LE-NEXT: srwi r11, r9, 31 ; P8LE-NEXT: srawi r9, r9, 6 ; P8LE-NEXT: mulli r8, r8, 95 -; P8LE-NEXT: add r9, r9, r10 -; P8LE-NEXT: srwi r10, r11, 31 -; P8LE-NEXT: srawi r11, r11, 6 +; P8LE-NEXT: add r9, r9, r11 +; P8LE-NEXT: srwi r11, r10, 31 +; P8LE-NEXT: srawi r10, r10, 6 ; P8LE-NEXT: mulli r9, r9, 95 -; P8LE-NEXT: add r10, r11, r10 +; P8LE-NEXT: add r10, r10, r11 ; P8LE-NEXT: srwi r11, r3, 31 ; P8LE-NEXT: srawi r3, r3, 6 ; P8LE-NEXT: mulli r10, r10, 95 ; P8LE-NEXT: sub r5, r5, r8 ; P8LE-NEXT: add r3, r3, r11 -; P8LE-NEXT: mtfprd f0, r5 +; P8LE-NEXT: mtvsrd v2, r5 ; P8LE-NEXT: mulli r3, r3, 95 ; P8LE-NEXT: sub r6, r6, r9 -; P8LE-NEXT: mtfprd f1, r6 -; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: mtvsrd v3, r6 ; P8LE-NEXT: sub r5, r7, r10 -; P8LE-NEXT: mtfprd f2, r5 -; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: mtvsrd v4, r5 ; P8LE-NEXT: sub r3, r4, r3 -; P8LE-NEXT: mtfprd f3, r3 -; P8LE-NEXT: xxswapd v4, vs2 -; P8LE-NEXT: vmrglh v2, v3, v2 -; P8LE-NEXT: xxswapd v5, vs3 -; P8LE-NEXT: vmrglh v3, v5, v4 +; P8LE-NEXT: vmrghh v2, v3, v2 +; P8LE-NEXT: mtvsrd v5, r3 +; P8LE-NEXT: vmrghh v3, v5, v4 ; P8LE-NEXT: vmrglw v2, v3, v2 ; P8LE-NEXT: blr ; @@ -487,67 +469,59 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 0 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: lis r5, -21386 -; P9LE-NEXT: ori r5, r5, 37253 -; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: mulhw r6, r4, r5 -; P9LE-NEXT: add r4, r6, r4 -; P9LE-NEXT: srwi r6, r4, 31 -; P9LE-NEXT: srawi r4, r4, 6 -; P9LE-NEXT: add r4, r4, r6 -; P9LE-NEXT: mulli r6, r4, 95 +; P9LE-NEXT: lis r4, -21386 +; P9LE-NEXT: ori r4, r4, 37253 +; P9LE-NEXT: extsh r3, r3 +; P9LE-NEXT: mulhw r5, r3, r4 +; P9LE-NEXT: add r5, r5, r3 +; P9LE-NEXT: srwi r6, r5, 31 +; P9LE-NEXT: srawi r5, r5, 6 +; P9LE-NEXT: add r5, r5, r6 +; P9LE-NEXT: mulli r6, r5, 95 ; P9LE-NEXT: sub r3, r3, r6 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r6, r3 -; P9LE-NEXT: mulhw r7, r6, r5 +; P9LE-NEXT: mulhw r7, r6, r4 ; P9LE-NEXT: add r6, r7, r6 ; P9LE-NEXT: srwi r7, r6, 31 ; P9LE-NEXT: srawi r6, r6, 6 ; P9LE-NEXT: add r6, r6, r7 ; P9LE-NEXT: mulli r7, r6, 95 ; P9LE-NEXT: sub r3, r3, r7 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r7, r3 -; P9LE-NEXT: mulhw r8, r7, r5 +; P9LE-NEXT: mulhw r8, r7, r4 ; P9LE-NEXT: add r7, r8, r7 ; P9LE-NEXT: srwi r8, r7, 31 ; P9LE-NEXT: srawi r7, r7, 6 ; P9LE-NEXT: add r7, r7, r8 ; P9LE-NEXT: mulli r8, r7, 95 ; P9LE-NEXT: sub r3, r3, r8 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r8, r3 -; P9LE-NEXT: mulhw r5, r8, r5 -; P9LE-NEXT: add r5, r5, r8 -; P9LE-NEXT: srwi r8, r5, 31 -; P9LE-NEXT: srawi r5, r5, 6 -; P9LE-NEXT: add r5, r5, r8 -; P9LE-NEXT: mulli r8, r5, 95 +; P9LE-NEXT: mulhw r4, r8, r4 +; P9LE-NEXT: add r4, r4, r8 +; P9LE-NEXT: srwi r8, r4, 31 +; P9LE-NEXT: srawi r4, r4, 6 +; P9LE-NEXT: add r4, r4, r8 +; P9LE-NEXT: mulli r8, r4, 95 ; P9LE-NEXT: sub r3, r3, r8 -; P9LE-NEXT: vmrglh v3, v4, v3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtfprd f0, r3 -; P9LE-NEXT: xxswapd v2, vs0 -; P9LE-NEXT: mtfprd f0, r4 -; P9LE-NEXT: vmrglh v2, v2, v4 +; P9LE-NEXT: mtvsrd v2, r3 +; P9LE-NEXT: vmrghh v2, v2, v4 +; P9LE-NEXT: mtvsrd v4, r6 ; P9LE-NEXT: vmrglw v2, v2, v3 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtfprd f0, r6 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtfprd f0, r7 -; P9LE-NEXT: vmrglh v3, v4, v3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtfprd f0, r5 -; P9LE-NEXT: xxswapd v5, vs0 -; P9LE-NEXT: vmrglh v4, v5, v4 +; P9LE-NEXT: mtvsrd v3, r5 +; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: mtvsrd v4, r7 +; P9LE-NEXT: mtvsrd v5, r4 +; P9LE-NEXT: vmrghh v4, v5, v4 ; P9LE-NEXT: vmrglw v3, v4, v3 ; P9LE-NEXT: vadduhm v2, v2, v3 ; P9LE-NEXT: blr @@ -624,69 +598,59 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { ; P8LE-LABEL: combine_srem_sdiv: ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 -; P8LE-NEXT: lis r4, -21386 -; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; P8LE-NEXT: ori r4, r4, 37253 -; P8LE-NEXT: mffprd r5, f0 -; P8LE-NEXT: clrldi r3, r5, 48 -; P8LE-NEXT: rldicl r6, r5, 48, 48 -; P8LE-NEXT: rldicl r7, r5, 32, 48 -; P8LE-NEXT: extsh r8, r3 -; P8LE-NEXT: extsh r9, r6 -; P8LE-NEXT: extsh r10, r7 -; P8LE-NEXT: mulhw r11, r8, r4 -; P8LE-NEXT: rldicl r5, r5, 16, 48 -; P8LE-NEXT: mulhw r12, r9, r4 -; P8LE-NEXT: mulhw r0, r10, r4 -; P8LE-NEXT: extsh r30, r5 -; P8LE-NEXT: mulhw r4, r30, r4 +; P8LE-NEXT: lis r3, -21386 +; P8LE-NEXT: ori r3, r3, 37253 +; P8LE-NEXT: mffprd r4, f0 +; P8LE-NEXT: clrldi r5, r4, 48 +; P8LE-NEXT: rldicl r6, r4, 48, 48 +; P8LE-NEXT: rldicl r7, r4, 32, 48 +; P8LE-NEXT: extsh r5, r5 +; P8LE-NEXT: extsh r8, r6 +; P8LE-NEXT: extsh r9, r7 +; P8LE-NEXT: mulhw r10, r5, r3 +; P8LE-NEXT: mulhw r11, r8, r3 +; P8LE-NEXT: rldicl r4, r4, 16, 48 +; P8LE-NEXT: mulhw r12, r9, r3 +; P8LE-NEXT: extsh r0, r4 +; P8LE-NEXT: mulhw r3, r0, r3 +; P8LE-NEXT: add r10, r10, r5 ; P8LE-NEXT: add r8, r11, r8 +; P8LE-NEXT: srwi r11, r10, 31 ; P8LE-NEXT: add r9, r12, r9 -; P8LE-NEXT: srwi r11, r8, 31 -; P8LE-NEXT: add r10, r0, r10 -; P8LE-NEXT: srawi r8, r8, 6 -; P8LE-NEXT: srawi r12, r9, 6 +; P8LE-NEXT: srawi r10, r10, 6 +; P8LE-NEXT: srawi r12, r8, 6 +; P8LE-NEXT: srwi r8, r8, 31 +; P8LE-NEXT: add r10, r10, r11 +; P8LE-NEXT: add r3, r3, r0 +; P8LE-NEXT: srawi r11, r9, 6 ; P8LE-NEXT: srwi r9, r9, 31 -; P8LE-NEXT: add r8, r8, r11 -; P8LE-NEXT: add r4, r4, r30 -; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; P8LE-NEXT: srawi r11, r10, 6 -; P8LE-NEXT: srwi r10, r10, 31 -; P8LE-NEXT: add r9, r12, r9 -; P8LE-NEXT: mtfprd f0, r8 -; P8LE-NEXT: mulli r12, r8, 95 -; P8LE-NEXT: add r10, r11, r10 -; P8LE-NEXT: srwi r8, r4, 31 -; P8LE-NEXT: mtfprd f1, r9 -; P8LE-NEXT: srawi r4, r4, 6 -; P8LE-NEXT: mulli r11, r9, 95 -; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: mtfprd f2, r10 -; P8LE-NEXT: mulli r9, r10, 95 -; P8LE-NEXT: add r4, r4, r8 -; P8LE-NEXT: xxswapd v3, vs1 -; P8LE-NEXT: mtfprd f3, r4 -; P8LE-NEXT: mulli r4, r4, 95 -; P8LE-NEXT: xxswapd v1, vs2 -; P8LE-NEXT: sub r3, r3, r12 -; P8LE-NEXT: mtfprd f0, r3 -; P8LE-NEXT: sub r6, r6, r11 -; P8LE-NEXT: xxswapd v6, vs3 -; P8LE-NEXT: sub r3, r7, r9 -; P8LE-NEXT: mtfprd f1, r6 -; P8LE-NEXT: mtfprd f4, r3 -; P8LE-NEXT: sub r3, r5, r4 -; P8LE-NEXT: mtfprd f5, r3 -; P8LE-NEXT: xxswapd v4, vs1 -; P8LE-NEXT: vmrglh v2, v3, v2 -; P8LE-NEXT: xxswapd v3, vs0 -; P8LE-NEXT: xxswapd v5, vs4 -; P8LE-NEXT: xxswapd v0, vs5 -; P8LE-NEXT: vmrglh v3, v4, v3 -; P8LE-NEXT: vmrglh v4, v0, v5 -; P8LE-NEXT: vmrglh v5, v6, v1 -; P8LE-NEXT: vmrglw v3, v4, v3 -; P8LE-NEXT: vmrglw v2, v5, v2 +; P8LE-NEXT: add r8, r12, r8 +; P8LE-NEXT: mtvsrd v2, r10 +; P8LE-NEXT: mulli r12, r10, 95 +; P8LE-NEXT: add r9, r11, r9 +; P8LE-NEXT: srwi r11, r3, 31 +; P8LE-NEXT: mtvsrd v3, r8 +; P8LE-NEXT: srawi r3, r3, 6 +; P8LE-NEXT: mulli r10, r8, 95 +; P8LE-NEXT: mtvsrd v4, r9 +; P8LE-NEXT: add r3, r3, r11 +; P8LE-NEXT: mulli r8, r9, 95 +; P8LE-NEXT: vmrghh v2, v3, v2 +; P8LE-NEXT: mulli r9, r3, 95 +; P8LE-NEXT: sub r5, r5, r12 +; P8LE-NEXT: sub r6, r6, r10 +; P8LE-NEXT: mtvsrd v3, r5 +; P8LE-NEXT: mtvsrd v5, r6 +; P8LE-NEXT: sub r5, r7, r8 +; P8LE-NEXT: sub r4, r4, r9 +; P8LE-NEXT: mtvsrd v0, r5 +; P8LE-NEXT: mtvsrd v1, r4 +; P8LE-NEXT: vmrghh v3, v5, v3 +; P8LE-NEXT: mtvsrd v5, r3 +; P8LE-NEXT: vmrghh v0, v1, v0 +; P8LE-NEXT: vmrghh v4, v5, v4 +; P8LE-NEXT: vmrglw v3, v0, v3 +; P8LE-NEXT: vmrglw v2, v4, v2 ; P8LE-NEXT: vadduhm v2, v3, v2 ; P8LE-NEXT: blr ; @@ -767,47 +731,43 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) { ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 0 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: srawi r4, r4, 6 +; P9LE-NEXT: extsh r3, r3 +; P9LE-NEXT: srawi r4, r3, 6 ; P9LE-NEXT: addze r4, r4 ; P9LE-NEXT: slwi r4, r4, 6 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: srawi r4, r4, 5 +; P9LE-NEXT: extsh r3, r3 +; P9LE-NEXT: srawi r4, r3, 5 ; P9LE-NEXT: addze r4, r4 ; P9LE-NEXT: slwi r4, r4, 5 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: lis r4, -21386 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: lis r5, -21386 -; P9LE-NEXT: ori r5, r5, 37253 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: mulhw r5, r4, r5 -; P9LE-NEXT: add r4, r5, r4 +; P9LE-NEXT: extsh r3, r3 +; P9LE-NEXT: ori r4, r4, 37253 +; P9LE-NEXT: mulhw r4, r3, r4 +; P9LE-NEXT: add r4, r4, r3 ; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 6 ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: srawi r4, r4, 3 +; P9LE-NEXT: extsh r3, r3 +; P9LE-NEXT: srawi r4, r3, 3 ; P9LE-NEXT: addze r4, r4 ; P9LE-NEXT: slwi r4, r4, 3 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: vmrglh v3, v4, v3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtfprd f0, r3 -; P9LE-NEXT: xxswapd v2, vs0 -; P9LE-NEXT: vmrglh v2, v4, v2 +; P9LE-NEXT: mtvsrd v2, r3 +; P9LE-NEXT: vmrghh v2, v4, v2 ; P9LE-NEXT: vmrglw v2, v2, v3 ; P9LE-NEXT: blr ; @@ -866,42 +826,38 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) { ; P8LE-NEXT: ori r3, r3, 37253 ; P8LE-NEXT: mffprd r4, f0 ; P8LE-NEXT: rldicl r5, r4, 16, 48 -; P8LE-NEXT: clrldi r7, r4, 48 -; P8LE-NEXT: extsh r6, r5 -; P8LE-NEXT: extsh r8, r7 -; P8LE-NEXT: mulhw r3, r6, r3 -; P8LE-NEXT: rldicl r9, r4, 48, 48 -; P8LE-NEXT: srawi r8, r8, 6 -; P8LE-NEXT: extsh r10, r9 +; P8LE-NEXT: clrldi r6, r4, 48 +; P8LE-NEXT: extsh r5, r5 +; P8LE-NEXT: extsh r6, r6 +; P8LE-NEXT: mulhw r3, r5, r3 +; P8LE-NEXT: rldicl r7, r4, 48, 48 +; P8LE-NEXT: srawi r8, r6, 6 +; P8LE-NEXT: extsh r7, r7 ; P8LE-NEXT: addze r8, r8 ; P8LE-NEXT: rldicl r4, r4, 32, 48 -; P8LE-NEXT: srawi r10, r10, 5 +; P8LE-NEXT: srawi r9, r7, 5 +; P8LE-NEXT: extsh r4, r4 ; P8LE-NEXT: slwi r8, r8, 6 -; P8LE-NEXT: add r3, r3, r6 -; P8LE-NEXT: addze r6, r10 -; P8LE-NEXT: sub r7, r7, r8 +; P8LE-NEXT: add r3, r3, r5 +; P8LE-NEXT: addze r9, r9 +; P8LE-NEXT: sub r6, r6, r8 ; P8LE-NEXT: srwi r10, r3, 31 ; P8LE-NEXT: srawi r3, r3, 6 -; P8LE-NEXT: mtfprd f0, r7 -; P8LE-NEXT: slwi r6, r6, 5 +; P8LE-NEXT: slwi r8, r9, 5 +; P8LE-NEXT: mtvsrd v2, r6 ; P8LE-NEXT: add r3, r3, r10 -; P8LE-NEXT: extsh r10, r4 -; P8LE-NEXT: sub r6, r9, r6 +; P8LE-NEXT: srawi r9, r4, 3 +; P8LE-NEXT: sub r6, r7, r8 ; P8LE-NEXT: mulli r3, r3, 95 -; P8LE-NEXT: srawi r8, r10, 3 -; P8LE-NEXT: mtfprd f1, r6 -; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: addze r7, r8 -; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: addze r7, r9 +; P8LE-NEXT: mtvsrd v3, r6 +; P8LE-NEXT: vmrghh v2, v3, v2 ; P8LE-NEXT: sub r3, r5, r3 ; P8LE-NEXT: slwi r5, r7, 3 ; P8LE-NEXT: sub r4, r4, r5 -; P8LE-NEXT: mtfprd f2, r3 -; P8LE-NEXT: mtfprd f3, r4 -; P8LE-NEXT: xxswapd v4, vs2 -; P8LE-NEXT: vmrglh v2, v3, v2 -; P8LE-NEXT: xxswapd v5, vs3 -; P8LE-NEXT: vmrglh v3, v4, v5 +; P8LE-NEXT: mtvsrd v4, r3 +; P8LE-NEXT: mtvsrd v5, r4 +; P8LE-NEXT: vmrghh v3, v4, v5 ; P8LE-NEXT: vmrglw v2, v3, v2 ; P8LE-NEXT: blr ; @@ -959,48 +915,46 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) { ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: lis r5, -14230 -; P9LE-NEXT: ori r5, r5, 30865 -; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: mulhw r5, r4, r5 -; P9LE-NEXT: add r4, r5, r4 +; P9LE-NEXT: lis r4, -14230 +; P9LE-NEXT: ori r4, r4, 30865 +; P9LE-NEXT: extsh r3, r3 +; P9LE-NEXT: mulhw r4, r3, r4 +; P9LE-NEXT: add r4, r4, r3 ; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 9 ; P9LE-NEXT: add r4, r4, r5 -; P9LE-NEXT: lis r5, -19946 ; P9LE-NEXT: mulli r4, r4, 654 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: lis r4, -19946 +; P9LE-NEXT: mtvsrd v3, r3 +; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 4 -; P9LE-NEXT: ori r5, r5, 17097 -; P9LE-NEXT: xxlxor v3, v3, v3 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: mulhw r5, r4, r5 -; P9LE-NEXT: add r4, r5, r4 +; P9LE-NEXT: extsh r3, r3 +; P9LE-NEXT: ori r4, r4, 17097 +; P9LE-NEXT: mulhw r4, r3, r4 +; P9LE-NEXT: add r4, r4, r3 ; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 4 ; P9LE-NEXT: add r4, r4, r5 -; P9LE-NEXT: lis r5, 24749 ; P9LE-NEXT: mulli r4, r4, 23 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: vmrghh v3, v3, v4 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: ori r5, r5, 47143 -; P9LE-NEXT: mulhw r4, r4, r5 +; P9LE-NEXT: lis r4, 24749 +; P9LE-NEXT: ori r4, r4, 47143 +; P9LE-NEXT: extsh r3, r3 +; P9LE-NEXT: mulhw r4, r3, r4 ; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 11 ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 5423 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: vmrglh v3, v4, v3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtfprd f0, r3 -; P9LE-NEXT: xxswapd v2, vs0 -; P9LE-NEXT: vmrglh v2, v2, v4 +; P9LE-NEXT: mtvsrd v2, r3 +; P9LE-NEXT: vmrghh v2, v2, v4 ; P9LE-NEXT: vmrglw v2, v2, v3 ; P9LE-NEXT: blr ; @@ -1058,49 +1012,47 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) { ; P8LE-LABEL: dont_fold_srem_one: ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 -; P8LE-NEXT: lis r3, 24749 -; P8LE-NEXT: lis r7, -19946 -; P8LE-NEXT: lis r9, -14230 -; P8LE-NEXT: xxlxor v5, v5, v5 -; P8LE-NEXT: ori r3, r3, 47143 -; P8LE-NEXT: ori r7, r7, 17097 -; P8LE-NEXT: mffprd r4, f0 -; P8LE-NEXT: rldicl r5, r4, 16, 48 -; P8LE-NEXT: rldicl r6, r4, 32, 48 -; P8LE-NEXT: rldicl r4, r4, 48, 48 -; P8LE-NEXT: extsh r8, r5 -; P8LE-NEXT: extsh r10, r6 -; P8LE-NEXT: mulhw r3, r8, r3 -; P8LE-NEXT: ori r8, r9, 30865 -; P8LE-NEXT: extsh r9, r4 -; P8LE-NEXT: mulhw r7, r10, r7 -; P8LE-NEXT: mulhw r8, r9, r8 -; P8LE-NEXT: add r7, r7, r10 -; P8LE-NEXT: srwi r10, r3, 31 -; P8LE-NEXT: add r8, r8, r9 -; P8LE-NEXT: srawi r3, r3, 11 -; P8LE-NEXT: srwi r9, r7, 31 -; P8LE-NEXT: srawi r7, r7, 4 -; P8LE-NEXT: add r3, r3, r10 -; P8LE-NEXT: add r7, r7, r9 +; P8LE-NEXT: lis r5, 24749 +; P8LE-NEXT: lis r6, -19946 +; P8LE-NEXT: lis r8, -14230 +; P8LE-NEXT: ori r5, r5, 47143 +; P8LE-NEXT: ori r6, r6, 17097 +; P8LE-NEXT: ori r8, r8, 30865 +; P8LE-NEXT: mffprd r3, f0 +; P8LE-NEXT: rldicl r4, r3, 16, 48 +; P8LE-NEXT: rldicl r7, r3, 32, 48 +; P8LE-NEXT: rldicl r3, r3, 48, 48 +; P8LE-NEXT: extsh r4, r4 +; P8LE-NEXT: extsh r7, r7 +; P8LE-NEXT: extsh r3, r3 +; P8LE-NEXT: mulhw r5, r4, r5 +; P8LE-NEXT: mulhw r6, r7, r6 +; P8LE-NEXT: mulhw r8, r3, r8 +; P8LE-NEXT: srwi r9, r5, 31 +; P8LE-NEXT: srawi r5, r5, 11 +; P8LE-NEXT: add r6, r6, r7 +; P8LE-NEXT: add r8, r8, r3 +; P8LE-NEXT: add r5, r5, r9 +; P8LE-NEXT: srwi r9, r6, 31 +; P8LE-NEXT: srawi r6, r6, 4 +; P8LE-NEXT: add r6, r6, r9 ; P8LE-NEXT: srwi r9, r8, 31 ; P8LE-NEXT: srawi r8, r8, 9 -; P8LE-NEXT: mulli r3, r3, 5423 +; P8LE-NEXT: mulli r5, r5, 5423 ; P8LE-NEXT: add r8, r8, r9 -; P8LE-NEXT: mulli r7, r7, 23 +; P8LE-NEXT: mulli r6, r6, 23 +; P8LE-NEXT: li r9, 0 ; P8LE-NEXT: mulli r8, r8, 654 -; P8LE-NEXT: sub r3, r5, r3 -; P8LE-NEXT: mtfprd f0, r3 -; P8LE-NEXT: sub r3, r6, r7 -; P8LE-NEXT: sub r4, r4, r8 -; P8LE-NEXT: mtfprd f1, r3 -; P8LE-NEXT: mtfprd f2, r4 -; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: xxswapd v3, vs1 -; P8LE-NEXT: xxswapd v4, vs2 -; P8LE-NEXT: vmrglh v2, v2, v3 -; P8LE-NEXT: vmrglh v3, v4, v5 -; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: mtvsrd v2, r9 +; P8LE-NEXT: sub r4, r4, r5 +; P8LE-NEXT: sub r5, r7, r6 +; P8LE-NEXT: mtvsrd v3, r4 +; P8LE-NEXT: sub r3, r3, r8 +; P8LE-NEXT: mtvsrd v4, r5 +; P8LE-NEXT: mtvsrd v5, r3 +; P8LE-NEXT: vmrghh v3, v3, v4 +; P8LE-NEXT: vmrghh v2, v5, v2 +; P8LE-NEXT: vmrglw v2, v3, v2 ; P8LE-NEXT: blr ; ; P8BE-LABEL: dont_fold_srem_one: @@ -1161,43 +1113,41 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) { ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: lis r5, -19946 -; P9LE-NEXT: ori r5, r5, 17097 -; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: mulhw r5, r4, r5 -; P9LE-NEXT: add r4, r5, r4 +; P9LE-NEXT: lis r4, -19946 +; P9LE-NEXT: ori r4, r4, 17097 +; P9LE-NEXT: extsh r3, r3 +; P9LE-NEXT: mulhw r4, r3, r4 +; P9LE-NEXT: add r4, r4, r3 ; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 4 ; P9LE-NEXT: add r4, r4, r5 -; P9LE-NEXT: lis r5, 24749 ; P9LE-NEXT: mulli r4, r4, 23 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: lis r4, 24749 +; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: ori r5, r5, 47143 -; P9LE-NEXT: mulhw r4, r4, r5 +; P9LE-NEXT: extsh r3, r3 +; P9LE-NEXT: ori r4, r4, 47143 +; P9LE-NEXT: mulhw r4, r3, r4 ; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 11 ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 5423 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: srawi r4, r4, 15 +; P9LE-NEXT: extsh r3, r3 +; P9LE-NEXT: srawi r4, r3, 15 ; P9LE-NEXT: addze r4, r4 ; P9LE-NEXT: slwi r4, r4, 15 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtfprd f0, r3 -; P9LE-NEXT: xxswapd v2, vs0 -; P9LE-NEXT: vmrglh v3, v4, v3 -; P9LE-NEXT: xxlxor v4, v4, v4 -; P9LE-NEXT: vmrglh v2, v2, v4 +; P9LE-NEXT: mtvsrd v2, r3 +; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: vmrghh v2, v2, v4 ; P9LE-NEXT: vmrglw v2, v3, v2 ; P9LE-NEXT: blr ; @@ -1252,42 +1202,40 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) { ; P8LE-NEXT: xxswapd vs0, v2 ; P8LE-NEXT: lis r4, 24749 ; P8LE-NEXT: lis r5, -19946 -; P8LE-NEXT: xxlxor v5, v5, v5 ; P8LE-NEXT: ori r4, r4, 47143 ; P8LE-NEXT: ori r5, r5, 17097 ; P8LE-NEXT: mffprd r3, f0 ; P8LE-NEXT: rldicl r6, r3, 16, 48 ; P8LE-NEXT: rldicl r7, r3, 32, 48 -; P8LE-NEXT: extsh r8, r6 -; P8LE-NEXT: extsh r9, r7 -; P8LE-NEXT: mulhw r4, r8, r4 -; P8LE-NEXT: mulhw r5, r9, r5 +; P8LE-NEXT: extsh r6, r6 +; P8LE-NEXT: extsh r7, r7 +; P8LE-NEXT: mulhw r4, r6, r4 +; P8LE-NEXT: mulhw r5, r7, r5 ; P8LE-NEXT: rldicl r3, r3, 48, 48 +; P8LE-NEXT: extsh r3, r3 ; P8LE-NEXT: srwi r8, r4, 31 ; P8LE-NEXT: srawi r4, r4, 11 -; P8LE-NEXT: add r5, r5, r9 +; P8LE-NEXT: add r5, r5, r7 ; P8LE-NEXT: add r4, r4, r8 ; P8LE-NEXT: srwi r8, r5, 31 ; P8LE-NEXT: srawi r5, r5, 4 ; P8LE-NEXT: mulli r4, r4, 5423 ; P8LE-NEXT: add r5, r5, r8 -; P8LE-NEXT: extsh r8, r3 +; P8LE-NEXT: srawi r9, r3, 15 +; P8LE-NEXT: li r8, 0 ; P8LE-NEXT: mulli r5, r5, 23 -; P8LE-NEXT: srawi r8, r8, 15 +; P8LE-NEXT: mtvsrd v2, r8 ; P8LE-NEXT: sub r4, r6, r4 -; P8LE-NEXT: addze r6, r8 -; P8LE-NEXT: mtfprd f0, r4 -; P8LE-NEXT: slwi r4, r6, 15 +; P8LE-NEXT: addze r6, r9 +; P8LE-NEXT: slwi r6, r6, 15 +; P8LE-NEXT: mtvsrd v3, r4 ; P8LE-NEXT: sub r5, r7, r5 -; P8LE-NEXT: sub r3, r3, r4 -; P8LE-NEXT: mtfprd f1, r5 -; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: mtfprd f2, r3 -; P8LE-NEXT: xxswapd v3, vs1 -; P8LE-NEXT: xxswapd v4, vs2 -; P8LE-NEXT: vmrglh v2, v2, v3 -; P8LE-NEXT: vmrglh v3, v4, v5 -; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: sub r3, r3, r6 +; P8LE-NEXT: mtvsrd v4, r5 +; P8LE-NEXT: mtvsrd v5, r3 +; P8LE-NEXT: vmrghh v3, v3, v4 +; P8LE-NEXT: vmrghh v2, v5, v2 +; P8LE-NEXT: vmrglw v2, v3, v2 ; P8LE-NEXT: blr ; ; P8BE-LABEL: dont_fold_urem_i16_smax: diff --git a/llvm/test/CodeGen/PowerPC/swaps-le-5.ll b/llvm/test/CodeGen/PowerPC/swaps-le-5.ll index 323397202c00..95f0fc25f2dd 100644 --- a/llvm/test/CodeGen/PowerPC/swaps-le-5.ll +++ b/llvm/test/CodeGen/PowerPC/swaps-le-5.ll @@ -15,10 +15,10 @@ entry: } ; CHECK-LABEL: @bar0 +; CHECK-DAG: xxswapd 1, 1 ; CHECK-DAG: lxvd2x [[REG1:[0-9]+]] -; CHECK-DAG: xxspltd [[REG2:[0-9]+]] -; CHECK: xxpermdi [[REG3:[0-9]+]], [[REG2]], [[REG1]], 1 -; CHECK: stxvd2x [[REG3]] +; CHECK: xxmrgld [[REG2:[0-9]+]], 1, [[REG1]] +; CHECK: stxvd2x [[REG2]] ; CHECK-NOT: xxswapd define void @bar1(double %y) { @@ -30,10 +30,10 @@ entry: } ; CHECK-LABEL: @bar1 +; CHECK-DAG: xxswapd 1, 1 ; CHECK-DAG: lxvd2x [[REG1:[0-9]+]] -; CHECK-DAG: xxspltd [[REG2:[0-9]+]] -; CHECK: xxmrghd [[REG3:[0-9]+]], [[REG1]], [[REG2]] -; CHECK: stxvd2x [[REG3]] +; CHECK: xxpermdi [[REG2:[0-9]+]], [[REG1]], 1, 1 +; CHECK: stxvd2x [[REG2]] ; CHECK-NOT: xxswapd define void @baz0() { diff --git a/llvm/test/CodeGen/PowerPC/swaps-le-6.ll b/llvm/test/CodeGen/PowerPC/swaps-le-6.ll index 23738eaa95a7..4437e6799269 100644 --- a/llvm/test/CodeGen/PowerPC/swaps-le-6.ll +++ b/llvm/test/CodeGen/PowerPC/swaps-le-6.ll @@ -27,7 +27,7 @@ define void @bar0() { ; CHECK: ld r3, .LC0@toc@l(r3) ; CHECK: addis r3, r2, .LC2@toc@ha ; CHECK: ld r3, .LC2@toc@l(r3) -; CHECK: xxpermdi vs0, vs0, vs1, 1 +; CHECK: xxmrgld vs0, vs0, vs1 ; CHECK: stxvd2x vs0, 0, r3 ; CHECK: blr ; @@ -38,7 +38,7 @@ define void @bar0() { ; CHECK-P9-NOVECTOR: addis r3, r2, .LC1@toc@ha ; CHECK-P9-NOVECTOR: addis r3, r2, .LC2@toc@ha ; CHECK-P9-NOVECTOR: ld r3, .LC2@toc@l(r3) -; CHECK-P9-NOVECTOR: xxpermdi vs0, vs1, vs0, 1 +; CHECK-P9-NOVECTOR: xxmrgld vs0, vs1, vs0 ; CHECK-P9-NOVECTOR: stxvd2x vs0, 0, r3 ; CHECK-P9-NOVECTOR: blr ; @@ -72,7 +72,7 @@ define void @bar1() { ; CHECK: ld r3, .LC0@toc@l(r3) ; CHECK: addis r3, r2, .LC2@toc@ha ; CHECK: ld r3, .LC2@toc@l(r3) -; CHECK: xxmrghd vs0, vs1, vs0 +; CHECK: xxpermdi vs0, vs1, vs0, 1 ; CHECK: stxvd2x vs0, 0, r3 ; CHECK: blr ; @@ -83,7 +83,7 @@ define void @bar1() { ; CHECK-P9-NOVECTOR: addis r3, r2, .LC1@toc@ha ; CHECK-P9-NOVECTOR: addis r3, r2, .LC2@toc@ha ; CHECK-P9-NOVECTOR: ld r3, .LC2@toc@l(r3) -; CHECK-P9-NOVECTOR: xxmrghd vs0, vs0, vs1 +; CHECK-P9-NOVECTOR: xxpermdi vs0, vs0, vs1, 1 ; CHECK-P9-NOVECTOR: stxvd2x vs0, 0, r3 ; CHECK-P9-NOVECTOR: blr ; diff --git a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll index d853a420dcd8..4bb3730aa043 100644 --- a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll @@ -13,53 +13,50 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) { ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: lis r5, 21399 -; P9LE-NEXT: ori r5, r5, 33437 -; P9LE-NEXT: clrlwi r4, r3, 16 -; P9LE-NEXT: mulhwu r4, r4, r5 -; P9LE-NEXT: lis r5, 16727 -; P9LE-NEXT: ori r5, r5, 2287 +; P9LE-NEXT: lis r4, 21399 +; P9LE-NEXT: ori r4, r4, 33437 +; P9LE-NEXT: clrlwi r3, r3, 16 +; P9LE-NEXT: mulhwu r4, r3, r4 ; P9LE-NEXT: srwi r4, r4, 5 ; P9LE-NEXT: mulli r4, r4, 98 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: lis r4, 16727 +; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r4, r3, 16 -; P9LE-NEXT: mulhwu r4, r4, r5 -; P9LE-NEXT: lis r5, 8456 -; P9LE-NEXT: ori r5, r5, 16913 +; P9LE-NEXT: clrlwi r3, r3, 16 +; P9LE-NEXT: ori r4, r4, 2287 +; P9LE-NEXT: mulhwu r4, r3, r4 ; P9LE-NEXT: srwi r4, r4, 8 ; P9LE-NEXT: mulli r4, r4, 1003 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: rlwinm r4, r3, 30, 18, 31 -; P9LE-NEXT: mulhwu r4, r4, r5 -; P9LE-NEXT: lis r5, 22765 -; P9LE-NEXT: ori r5, r5, 8969 -; P9LE-NEXT: srwi r4, r4, 2 -; P9LE-NEXT: mulli r4, r4, 124 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: lis r5, 8456 +; P9LE-NEXT: ori r5, r5, 16913 +; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: clrlwi r4, r3, 16 +; P9LE-NEXT: rlwinm r3, r3, 30, 18, 31 +; P9LE-NEXT: mulhwu r3, r3, r5 +; P9LE-NEXT: srwi r3, r3, 2 +; P9LE-NEXT: mulli r3, r3, 124 +; P9LE-NEXT: sub r3, r4, r3 +; P9LE-NEXT: lis r4, 22765 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 0 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r4, r3, 16 -; P9LE-NEXT: mulhwu r5, r4, r5 -; P9LE-NEXT: sub r4, r4, r5 -; P9LE-NEXT: srwi r4, r4, 1 -; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: clrlwi r3, r3, 16 +; P9LE-NEXT: ori r4, r4, 8969 +; P9LE-NEXT: mulhwu r4, r3, r4 +; P9LE-NEXT: sub r5, r3, r4 +; P9LE-NEXT: srwi r5, r5, 1 +; P9LE-NEXT: add r4, r5, r4 ; P9LE-NEXT: srwi r4, r4, 6 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: vmrglh v3, v4, v3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtfprd f0, r3 -; P9LE-NEXT: xxswapd v2, vs0 -; P9LE-NEXT: vmrglh v2, v4, v2 +; P9LE-NEXT: mtvsrd v2, r3 +; P9LE-NEXT: vmrghh v2, v4, v2 ; P9LE-NEXT: vmrglw v2, v3, v2 ; P9LE-NEXT: blr ; @@ -123,50 +120,47 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) { ; P8LE-NEXT: xxswapd vs0, v2 ; P8LE-NEXT: lis r3, 22765 ; P8LE-NEXT: lis r7, 21399 -; P8LE-NEXT: lis r10, 16727 +; P8LE-NEXT: lis r9, 16727 +; P8LE-NEXT: lis r10, 8456 ; P8LE-NEXT: ori r3, r3, 8969 ; P8LE-NEXT: ori r7, r7, 33437 -; P8LE-NEXT: ori r10, r10, 2287 +; P8LE-NEXT: ori r9, r9, 2287 +; P8LE-NEXT: ori r10, r10, 16913 ; P8LE-NEXT: mffprd r4, f0 ; P8LE-NEXT: clrldi r6, r4, 48 ; P8LE-NEXT: rldicl r5, r4, 32, 48 -; P8LE-NEXT: clrlwi r9, r6, 16 +; P8LE-NEXT: clrlwi r6, r6, 16 ; P8LE-NEXT: rldicl r8, r4, 16, 48 -; P8LE-NEXT: clrlwi r11, r5, 16 -; P8LE-NEXT: mulhwu r3, r9, r3 -; P8LE-NEXT: clrlwi r12, r8, 16 -; P8LE-NEXT: mulhwu r7, r11, r7 -; P8LE-NEXT: lis r11, 8456 +; P8LE-NEXT: clrlwi r5, r5, 16 +; P8LE-NEXT: mulhwu r3, r6, r3 ; P8LE-NEXT: rldicl r4, r4, 48, 48 -; P8LE-NEXT: mulhwu r10, r12, r10 -; P8LE-NEXT: ori r11, r11, 16913 -; P8LE-NEXT: rlwinm r12, r4, 30, 18, 31 -; P8LE-NEXT: mulhwu r11, r12, r11 -; P8LE-NEXT: sub r9, r9, r3 -; P8LE-NEXT: srwi r9, r9, 1 +; P8LE-NEXT: clrlwi r8, r8, 16 +; P8LE-NEXT: rlwinm r11, r4, 30, 18, 31 +; P8LE-NEXT: mulhwu r7, r5, r7 +; P8LE-NEXT: clrlwi r4, r4, 16 +; P8LE-NEXT: mulhwu r9, r8, r9 +; P8LE-NEXT: mulhwu r10, r11, r10 +; P8LE-NEXT: sub r11, r6, r3 +; P8LE-NEXT: srwi r11, r11, 1 ; P8LE-NEXT: srwi r7, r7, 5 -; P8LE-NEXT: add r3, r9, r3 -; P8LE-NEXT: srwi r9, r10, 8 +; P8LE-NEXT: add r3, r11, r3 +; P8LE-NEXT: srwi r9, r9, 8 +; P8LE-NEXT: srwi r10, r10, 2 ; P8LE-NEXT: srwi r3, r3, 6 ; P8LE-NEXT: mulli r7, r7, 98 -; P8LE-NEXT: srwi r10, r11, 2 ; P8LE-NEXT: mulli r9, r9, 1003 ; P8LE-NEXT: mulli r3, r3, 95 ; P8LE-NEXT: mulli r10, r10, 124 ; P8LE-NEXT: sub r5, r5, r7 ; P8LE-NEXT: sub r7, r8, r9 -; P8LE-NEXT: mtfprd f0, r5 ; P8LE-NEXT: sub r3, r6, r3 +; P8LE-NEXT: mtvsrd v2, r5 ; P8LE-NEXT: sub r4, r4, r10 -; P8LE-NEXT: mtfprd f1, r7 -; P8LE-NEXT: mtfprd f2, r3 -; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: mtfprd f3, r4 -; P8LE-NEXT: xxswapd v3, vs1 -; P8LE-NEXT: xxswapd v4, vs2 -; P8LE-NEXT: xxswapd v5, vs3 -; P8LE-NEXT: vmrglh v2, v3, v2 -; P8LE-NEXT: vmrglh v3, v5, v4 +; P8LE-NEXT: mtvsrd v3, r7 +; P8LE-NEXT: mtvsrd v4, r3 +; P8LE-NEXT: mtvsrd v5, r4 +; P8LE-NEXT: vmrghh v2, v3, v2 +; P8LE-NEXT: vmrghh v3, v5, v4 ; P8LE-NEXT: vmrglw v2, v2, v3 ; P8LE-NEXT: blr ; @@ -230,56 +224,52 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) { ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 0 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: lis r5, 22765 -; P9LE-NEXT: ori r5, r5, 8969 -; P9LE-NEXT: clrlwi r4, r3, 16 -; P9LE-NEXT: mulhwu r6, r4, r5 -; P9LE-NEXT: sub r4, r4, r6 -; P9LE-NEXT: srwi r4, r4, 1 -; P9LE-NEXT: add r4, r4, r6 -; P9LE-NEXT: srwi r4, r4, 6 -; P9LE-NEXT: mulli r4, r4, 95 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: lis r4, 22765 +; P9LE-NEXT: ori r4, r4, 8969 +; P9LE-NEXT: clrlwi r3, r3, 16 +; P9LE-NEXT: mulhwu r5, r3, r4 +; P9LE-NEXT: sub r6, r3, r5 +; P9LE-NEXT: srwi r6, r6, 1 +; P9LE-NEXT: add r5, r6, r5 +; P9LE-NEXT: srwi r5, r5, 6 +; P9LE-NEXT: mulli r5, r5, 95 +; P9LE-NEXT: sub r3, r3, r5 +; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r4, r3, 16 -; P9LE-NEXT: mulhwu r6, r4, r5 -; P9LE-NEXT: sub r4, r4, r6 -; P9LE-NEXT: srwi r4, r4, 1 -; P9LE-NEXT: add r4, r4, r6 -; P9LE-NEXT: srwi r4, r4, 6 -; P9LE-NEXT: mulli r4, r4, 95 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: clrlwi r3, r3, 16 +; P9LE-NEXT: mulhwu r5, r3, r4 +; P9LE-NEXT: sub r6, r3, r5 +; P9LE-NEXT: srwi r6, r6, 1 +; P9LE-NEXT: add r5, r6, r5 +; P9LE-NEXT: srwi r5, r5, 6 +; P9LE-NEXT: mulli r5, r5, 95 +; P9LE-NEXT: sub r3, r3, r5 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r4, r3, 16 -; P9LE-NEXT: mulhwu r6, r4, r5 -; P9LE-NEXT: sub r4, r4, r6 -; P9LE-NEXT: srwi r4, r4, 1 -; P9LE-NEXT: add r4, r4, r6 -; P9LE-NEXT: srwi r4, r4, 6 -; P9LE-NEXT: mulli r4, r4, 95 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: clrlwi r3, r3, 16 +; P9LE-NEXT: mulhwu r5, r3, r4 +; P9LE-NEXT: sub r6, r3, r5 +; P9LE-NEXT: srwi r6, r6, 1 +; P9LE-NEXT: add r5, r6, r5 +; P9LE-NEXT: srwi r5, r5, 6 +; P9LE-NEXT: mulli r5, r5, 95 +; P9LE-NEXT: sub r3, r3, r5 +; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r4, r3, 16 -; P9LE-NEXT: mulhwu r5, r4, r5 -; P9LE-NEXT: sub r4, r4, r5 -; P9LE-NEXT: srwi r4, r4, 1 -; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: clrlwi r3, r3, 16 +; P9LE-NEXT: mulhwu r4, r3, r4 +; P9LE-NEXT: sub r5, r3, r4 +; P9LE-NEXT: srwi r5, r5, 1 +; P9LE-NEXT: add r4, r5, r4 ; P9LE-NEXT: srwi r4, r4, 6 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: vmrglh v3, v4, v3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtfprd f0, r3 -; P9LE-NEXT: xxswapd v2, vs0 -; P9LE-NEXT: vmrglh v2, v2, v4 +; P9LE-NEXT: mtvsrd v2, r3 +; P9LE-NEXT: vmrghh v2, v2, v4 ; P9LE-NEXT: vmrglw v2, v2, v3 ; P9LE-NEXT: blr ; @@ -344,36 +334,34 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) { ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 ; P8LE-NEXT: lis r3, 22765 -; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; P8LE-NEXT: ori r3, r3, 8969 ; P8LE-NEXT: mffprd r4, f0 ; P8LE-NEXT: clrldi r5, r4, 48 ; P8LE-NEXT: rldicl r6, r4, 48, 48 -; P8LE-NEXT: clrlwi r8, r5, 16 +; P8LE-NEXT: clrlwi r5, r5, 16 ; P8LE-NEXT: rldicl r7, r4, 32, 48 -; P8LE-NEXT: clrlwi r9, r6, 16 +; P8LE-NEXT: clrlwi r6, r6, 16 +; P8LE-NEXT: mulhwu r8, r5, r3 ; P8LE-NEXT: rldicl r4, r4, 16, 48 -; P8LE-NEXT: mulhwu r10, r8, r3 -; P8LE-NEXT: clrlwi r11, r7, 16 -; P8LE-NEXT: clrlwi r0, r4, 16 -; P8LE-NEXT: mulhwu r12, r9, r3 -; P8LE-NEXT: mulhwu r30, r11, r3 -; P8LE-NEXT: mulhwu r3, r0, r3 -; P8LE-NEXT: sub r8, r8, r10 -; P8LE-NEXT: srwi r8, r8, 1 -; P8LE-NEXT: sub r9, r9, r12 -; P8LE-NEXT: add r8, r8, r10 -; P8LE-NEXT: sub r10, r11, r30 -; P8LE-NEXT: sub r11, r0, r3 -; P8LE-NEXT: srwi r9, r9, 1 -; P8LE-NEXT: srwi r10, r10, 1 +; P8LE-NEXT: clrlwi r7, r7, 16 +; P8LE-NEXT: mulhwu r9, r6, r3 +; P8LE-NEXT: clrlwi r4, r4, 16 +; P8LE-NEXT: mulhwu r10, r7, r3 +; P8LE-NEXT: mulhwu r3, r4, r3 +; P8LE-NEXT: sub r11, r5, r8 +; P8LE-NEXT: sub r12, r6, r9 +; P8LE-NEXT: srwi r11, r11, 1 +; P8LE-NEXT: add r8, r11, r8 +; P8LE-NEXT: sub r11, r7, r10 +; P8LE-NEXT: srwi r12, r12, 1 +; P8LE-NEXT: add r9, r12, r9 +; P8LE-NEXT: sub r12, r4, r3 ; P8LE-NEXT: srwi r11, r11, 1 -; P8LE-NEXT: add r9, r9, r12 ; P8LE-NEXT: srwi r8, r8, 6 -; P8LE-NEXT: add r10, r10, r30 -; P8LE-NEXT: add r3, r11, r3 +; P8LE-NEXT: add r10, r11, r10 +; P8LE-NEXT: srwi r11, r12, 1 ; P8LE-NEXT: srwi r9, r9, 6 -; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8LE-NEXT: add r3, r11, r3 ; P8LE-NEXT: mulli r8, r8, 95 ; P8LE-NEXT: srwi r10, r10, 6 ; P8LE-NEXT: srwi r3, r3, 6 @@ -382,18 +370,14 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) { ; P8LE-NEXT: mulli r3, r3, 95 ; P8LE-NEXT: sub r5, r5, r8 ; P8LE-NEXT: sub r6, r6, r9 -; P8LE-NEXT: mtfprd f0, r5 +; P8LE-NEXT: mtvsrd v2, r5 ; P8LE-NEXT: sub r5, r7, r10 ; P8LE-NEXT: sub r3, r4, r3 -; P8LE-NEXT: mtfprd f1, r6 -; P8LE-NEXT: mtfprd f2, r5 -; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: mtfprd f3, r3 -; P8LE-NEXT: xxswapd v3, vs1 -; P8LE-NEXT: xxswapd v4, vs2 -; P8LE-NEXT: xxswapd v5, vs3 -; P8LE-NEXT: vmrglh v2, v3, v2 -; P8LE-NEXT: vmrglh v3, v5, v4 +; P8LE-NEXT: mtvsrd v3, r6 +; P8LE-NEXT: mtvsrd v4, r5 +; P8LE-NEXT: mtvsrd v5, r3 +; P8LE-NEXT: vmrghh v2, v3, v2 +; P8LE-NEXT: vmrghh v3, v5, v4 ; P8LE-NEXT: vmrglw v2, v3, v2 ; P8LE-NEXT: blr ; @@ -461,67 +445,59 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) { ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 0 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: lis r5, 22765 -; P9LE-NEXT: ori r5, r5, 8969 -; P9LE-NEXT: clrlwi r4, r3, 16 -; P9LE-NEXT: mulhwu r6, r4, r5 -; P9LE-NEXT: sub r4, r4, r6 -; P9LE-NEXT: srwi r4, r4, 1 -; P9LE-NEXT: add r4, r4, r6 -; P9LE-NEXT: srwi r4, r4, 6 -; P9LE-NEXT: mulli r6, r4, 95 +; P9LE-NEXT: lis r4, 22765 +; P9LE-NEXT: ori r4, r4, 8969 +; P9LE-NEXT: clrlwi r3, r3, 16 +; P9LE-NEXT: mulhwu r5, r3, r4 +; P9LE-NEXT: sub r6, r3, r5 +; P9LE-NEXT: srwi r6, r6, 1 +; P9LE-NEXT: add r5, r6, r5 +; P9LE-NEXT: srwi r5, r5, 6 +; P9LE-NEXT: mulli r6, r5, 95 ; P9LE-NEXT: sub r3, r3, r6 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: clrlwi r6, r3, 16 -; P9LE-NEXT: mulhwu r7, r6, r5 +; P9LE-NEXT: mulhwu r7, r6, r4 ; P9LE-NEXT: sub r6, r6, r7 ; P9LE-NEXT: srwi r6, r6, 1 ; P9LE-NEXT: add r6, r6, r7 ; P9LE-NEXT: srwi r6, r6, 6 ; P9LE-NEXT: mulli r7, r6, 95 ; P9LE-NEXT: sub r3, r3, r7 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: clrlwi r7, r3, 16 -; P9LE-NEXT: mulhwu r8, r7, r5 +; P9LE-NEXT: mulhwu r8, r7, r4 ; P9LE-NEXT: sub r7, r7, r8 ; P9LE-NEXT: srwi r7, r7, 1 ; P9LE-NEXT: add r7, r7, r8 ; P9LE-NEXT: srwi r7, r7, 6 ; P9LE-NEXT: mulli r8, r7, 95 ; P9LE-NEXT: sub r3, r3, r8 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: clrlwi r8, r3, 16 -; P9LE-NEXT: mulhwu r5, r8, r5 -; P9LE-NEXT: sub r8, r8, r5 +; P9LE-NEXT: mulhwu r4, r8, r4 +; P9LE-NEXT: sub r8, r8, r4 ; P9LE-NEXT: srwi r8, r8, 1 -; P9LE-NEXT: add r5, r8, r5 -; P9LE-NEXT: srwi r5, r5, 6 -; P9LE-NEXT: mulli r8, r5, 95 +; P9LE-NEXT: add r4, r8, r4 +; P9LE-NEXT: srwi r4, r4, 6 +; P9LE-NEXT: mulli r8, r4, 95 ; P9LE-NEXT: sub r3, r3, r8 -; P9LE-NEXT: vmrglh v3, v4, v3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtfprd f0, r3 -; P9LE-NEXT: xxswapd v2, vs0 -; P9LE-NEXT: mtfprd f0, r4 -; P9LE-NEXT: vmrglh v2, v2, v4 +; P9LE-NEXT: mtvsrd v2, r3 +; P9LE-NEXT: vmrghh v2, v2, v4 +; P9LE-NEXT: mtvsrd v4, r6 ; P9LE-NEXT: vmrglw v2, v2, v3 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtfprd f0, r6 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtfprd f0, r7 -; P9LE-NEXT: vmrglh v3, v4, v3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtfprd f0, r5 -; P9LE-NEXT: xxswapd v5, vs0 -; P9LE-NEXT: vmrglh v4, v5, v4 +; P9LE-NEXT: mtvsrd v3, r5 +; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: mtvsrd v4, r7 +; P9LE-NEXT: mtvsrd v5, r4 +; P9LE-NEXT: vmrghh v4, v5, v4 ; P9LE-NEXT: vmrglw v3, v4, v3 ; P9LE-NEXT: vadduhm v2, v2, v3 ; P9LE-NEXT: blr @@ -598,69 +574,61 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) { ; P8LE-LABEL: combine_urem_udiv: ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 -; P8LE-NEXT: lis r4, 22765 +; P8LE-NEXT: lis r3, 22765 ; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; P8LE-NEXT: ori r4, r4, 8969 -; P8LE-NEXT: mffprd r5, f0 -; P8LE-NEXT: clrldi r3, r5, 48 -; P8LE-NEXT: rldicl r6, r5, 48, 48 -; P8LE-NEXT: clrlwi r8, r3, 16 -; P8LE-NEXT: rldicl r7, r5, 32, 48 -; P8LE-NEXT: clrlwi r9, r6, 16 -; P8LE-NEXT: mulhwu r10, r8, r4 -; P8LE-NEXT: clrlwi r11, r7, 16 -; P8LE-NEXT: rldicl r5, r5, 16, 48 -; P8LE-NEXT: mulhwu r12, r9, r4 -; P8LE-NEXT: mulhwu r0, r11, r4 -; P8LE-NEXT: clrlwi r30, r5, 16 -; P8LE-NEXT: mulhwu r4, r30, r4 -; P8LE-NEXT: sub r8, r8, r10 +; P8LE-NEXT: ori r3, r3, 8969 +; P8LE-NEXT: mffprd r4, f0 +; P8LE-NEXT: clrldi r5, r4, 48 +; P8LE-NEXT: rldicl r6, r4, 48, 48 +; P8LE-NEXT: clrlwi r5, r5, 16 +; P8LE-NEXT: clrlwi r8, r6, 16 +; P8LE-NEXT: rldicl r7, r4, 32, 48 +; P8LE-NEXT: rldicl r4, r4, 16, 48 +; P8LE-NEXT: mulhwu r9, r5, r3 +; P8LE-NEXT: mulhwu r11, r8, r3 +; P8LE-NEXT: clrlwi r10, r7, 16 +; P8LE-NEXT: clrlwi r12, r4, 16 +; P8LE-NEXT: mulhwu r0, r10, r3 +; P8LE-NEXT: mulhwu r3, r12, r3 +; P8LE-NEXT: sub r30, r5, r9 +; P8LE-NEXT: sub r8, r8, r11 +; P8LE-NEXT: srwi r30, r30, 1 ; P8LE-NEXT: srwi r8, r8, 1 -; P8LE-NEXT: sub r9, r9, r12 -; P8LE-NEXT: add r8, r8, r10 -; P8LE-NEXT: sub r10, r11, r0 -; P8LE-NEXT: srwi r9, r9, 1 +; P8LE-NEXT: sub r10, r10, r0 +; P8LE-NEXT: add r9, r30, r9 +; P8LE-NEXT: add r8, r8, r11 +; P8LE-NEXT: sub r11, r12, r3 ; P8LE-NEXT: srwi r10, r10, 1 -; P8LE-NEXT: sub r11, r30, r4 -; P8LE-NEXT: add r9, r9, r12 -; P8LE-NEXT: srwi r8, r8, 6 ; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; P8LE-NEXT: add r10, r10, r0 -; P8LE-NEXT: srwi r11, r11, 1 ; P8LE-NEXT: srwi r9, r9, 6 -; P8LE-NEXT: mtfprd f0, r8 -; P8LE-NEXT: mulli r12, r8, 95 +; P8LE-NEXT: srwi r11, r11, 1 +; P8LE-NEXT: srwi r8, r8, 6 +; P8LE-NEXT: add r10, r10, r0 +; P8LE-NEXT: mulli r12, r9, 95 +; P8LE-NEXT: add r3, r11, r3 +; P8LE-NEXT: mtvsrd v2, r9 ; P8LE-NEXT: srwi r10, r10, 6 -; P8LE-NEXT: add r4, r11, r4 -; P8LE-NEXT: mtfprd f1, r9 -; P8LE-NEXT: mulli r8, r9, 95 -; P8LE-NEXT: mulli r9, r10, 95 -; P8LE-NEXT: srwi r4, r4, 6 -; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: mtfprd f2, r10 -; P8LE-NEXT: mtfprd f3, r4 -; P8LE-NEXT: mulli r4, r4, 95 -; P8LE-NEXT: xxswapd v3, vs1 -; P8LE-NEXT: xxswapd v1, vs2 -; P8LE-NEXT: sub r3, r3, r12 -; P8LE-NEXT: xxswapd v6, vs3 -; P8LE-NEXT: mtfprd f0, r3 -; P8LE-NEXT: sub r3, r7, r9 -; P8LE-NEXT: sub r6, r6, r8 -; P8LE-NEXT: mtfprd f4, r3 -; P8LE-NEXT: sub r3, r5, r4 -; P8LE-NEXT: mtfprd f1, r6 -; P8LE-NEXT: mtfprd f5, r3 -; P8LE-NEXT: xxswapd v5, vs4 -; P8LE-NEXT: vmrglh v2, v3, v2 -; P8LE-NEXT: xxswapd v3, vs0 -; P8LE-NEXT: xxswapd v4, vs1 -; P8LE-NEXT: xxswapd v0, vs5 -; P8LE-NEXT: vmrglh v3, v4, v3 -; P8LE-NEXT: vmrglh v4, v0, v5 -; P8LE-NEXT: vmrglh v5, v6, v1 -; P8LE-NEXT: vmrglw v3, v4, v3 -; P8LE-NEXT: vmrglw v2, v5, v2 +; P8LE-NEXT: mulli r9, r8, 95 +; P8LE-NEXT: srwi r3, r3, 6 +; P8LE-NEXT: mtvsrd v3, r8 +; P8LE-NEXT: mulli r8, r10, 95 +; P8LE-NEXT: mtvsrd v4, r10 +; P8LE-NEXT: mulli r10, r3, 95 +; P8LE-NEXT: vmrghh v2, v3, v2 +; P8LE-NEXT: sub r5, r5, r12 +; P8LE-NEXT: sub r6, r6, r9 +; P8LE-NEXT: mtvsrd v3, r5 +; P8LE-NEXT: mtvsrd v5, r6 +; P8LE-NEXT: sub r5, r7, r8 +; P8LE-NEXT: sub r4, r4, r10 +; P8LE-NEXT: mtvsrd v0, r5 +; P8LE-NEXT: mtvsrd v1, r4 +; P8LE-NEXT: vmrghh v3, v5, v3 +; P8LE-NEXT: mtvsrd v5, r3 +; P8LE-NEXT: vmrghh v0, v1, v0 +; P8LE-NEXT: vmrghh v4, v5, v4 +; P8LE-NEXT: vmrglw v3, v0, v3 +; P8LE-NEXT: vmrglw v2, v4, v2 ; P8LE-NEXT: vadduhm v2, v3, v2 ; P8LE-NEXT: blr ; @@ -742,34 +710,30 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) { ; P9LE-NEXT: li r3, 0 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: clrlwi r3, r3, 26 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: clrlwi r3, r3, 27 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: lis r5, 22765 -; P9LE-NEXT: ori r5, r5, 8969 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: clrlwi r4, r3, 16 -; P9LE-NEXT: mulhwu r5, r4, r5 -; P9LE-NEXT: sub r4, r4, r5 -; P9LE-NEXT: srwi r4, r4, 1 -; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: lis r4, 22765 +; P9LE-NEXT: ori r4, r4, 8969 +; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: clrlwi r3, r3, 16 +; P9LE-NEXT: mulhwu r4, r3, r4 +; P9LE-NEXT: sub r5, r3, r4 +; P9LE-NEXT: srwi r5, r5, 1 +; P9LE-NEXT: add r4, r5, r4 ; P9LE-NEXT: srwi r4, r4, 6 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: clrlwi r3, r3, 29 -; P9LE-NEXT: vmrglh v3, v4, v3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtfprd f0, r3 -; P9LE-NEXT: xxswapd v2, vs0 -; P9LE-NEXT: vmrglh v2, v4, v2 +; P9LE-NEXT: mtvsrd v2, r3 +; P9LE-NEXT: vmrghh v2, v4, v2 ; P9LE-NEXT: vmrglw v2, v2, v3 ; P9LE-NEXT: blr ; @@ -817,9 +781,9 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) { ; P8LE-NEXT: mffprd r4, f0 ; P8LE-NEXT: rldicl r5, r4, 16, 48 ; P8LE-NEXT: rldicl r7, r4, 48, 48 -; P8LE-NEXT: clrlwi r6, r5, 16 -; P8LE-NEXT: mulhwu r3, r6, r3 -; P8LE-NEXT: sub r6, r6, r3 +; P8LE-NEXT: clrlwi r5, r5, 16 +; P8LE-NEXT: mulhwu r3, r5, r3 +; P8LE-NEXT: sub r6, r5, r3 ; P8LE-NEXT: srwi r6, r6, 1 ; P8LE-NEXT: add r3, r6, r3 ; P8LE-NEXT: clrldi r6, r4, 48 @@ -827,19 +791,15 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) { ; P8LE-NEXT: clrlwi r6, r6, 26 ; P8LE-NEXT: mulli r3, r3, 95 ; P8LE-NEXT: rldicl r4, r4, 32, 48 -; P8LE-NEXT: mtfprd f0, r6 +; P8LE-NEXT: mtvsrd v2, r6 ; P8LE-NEXT: clrlwi r6, r7, 27 ; P8LE-NEXT: clrlwi r4, r4, 29 -; P8LE-NEXT: mtfprd f1, r6 -; P8LE-NEXT: mtfprd f3, r4 -; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: mtvsrd v3, r6 +; P8LE-NEXT: mtvsrd v5, r4 +; P8LE-NEXT: vmrghh v2, v3, v2 ; P8LE-NEXT: sub r3, r5, r3 -; P8LE-NEXT: xxswapd v5, vs3 -; P8LE-NEXT: mtfprd f2, r3 -; P8LE-NEXT: vmrglh v2, v3, v2 -; P8LE-NEXT: xxswapd v4, vs2 -; P8LE-NEXT: vmrglh v3, v4, v5 +; P8LE-NEXT: mtvsrd v4, r3 +; P8LE-NEXT: vmrghh v3, v4, v5 ; P8LE-NEXT: vmrglw v2, v3, v2 ; P8LE-NEXT: blr ; @@ -885,40 +845,39 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) { ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: lis r5, -19946 -; P9LE-NEXT: ori r5, r5, 17097 -; P9LE-NEXT: clrlwi r4, r3, 16 -; P9LE-NEXT: mulhwu r4, r4, r5 -; P9LE-NEXT: lis r5, 24749 -; P9LE-NEXT: ori r5, r5, 47143 +; P9LE-NEXT: lis r4, -19946 +; P9LE-NEXT: ori r4, r4, 17097 +; P9LE-NEXT: clrlwi r3, r3, 16 +; P9LE-NEXT: mulhwu r4, r3, r4 ; P9LE-NEXT: srwi r4, r4, 4 ; P9LE-NEXT: mulli r4, r4, 23 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: lis r4, 24749 +; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r4, r3, 16 -; P9LE-NEXT: mulhwu r4, r4, r5 -; P9LE-NEXT: lis r5, -14230 -; P9LE-NEXT: ori r5, r5, 30865 +; P9LE-NEXT: clrlwi r3, r3, 16 +; P9LE-NEXT: ori r4, r4, 47143 +; P9LE-NEXT: mulhwu r4, r3, r4 ; P9LE-NEXT: srwi r4, r4, 11 ; P9LE-NEXT: mulli r4, r4, 5423 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtfprd f0, r3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: rlwinm r4, r3, 31, 17, 31 -; P9LE-NEXT: mulhwu r4, r4, r5 -; P9LE-NEXT: srwi r4, r4, 8 -; P9LE-NEXT: mulli r4, r4, 654 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtfprd f0, r3 -; P9LE-NEXT: xxswapd v2, vs0 -; P9LE-NEXT: vmrglh v3, v4, v3 -; P9LE-NEXT: xxlxor v4, v4, v4 -; P9LE-NEXT: vmrglh v2, v2, v4 +; P9LE-NEXT: lis r5, -14230 +; P9LE-NEXT: ori r5, r5, 30865 +; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: clrlwi r4, r3, 16 +; P9LE-NEXT: rlwinm r3, r3, 31, 17, 31 +; P9LE-NEXT: mulhwu r3, r3, r5 +; P9LE-NEXT: srwi r3, r3, 8 +; P9LE-NEXT: mulli r3, r3, 654 +; P9LE-NEXT: sub r3, r4, r3 +; P9LE-NEXT: mtvsrd v2, r3 +; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: vmrghh v2, v2, v4 ; P9LE-NEXT: vmrglw v2, v3, v2 ; P9LE-NEXT: blr ; @@ -969,41 +928,40 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) { ; P8LE-LABEL: dont_fold_urem_one: ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 -; P8LE-NEXT: lis r3, -19946 -; P8LE-NEXT: lis r7, 24749 -; P8LE-NEXT: lis r9, -14230 -; P8LE-NEXT: xxlxor v5, v5, v5 -; P8LE-NEXT: ori r3, r3, 17097 -; P8LE-NEXT: ori r7, r7, 47143 -; P8LE-NEXT: ori r9, r9, 30865 +; P8LE-NEXT: lis r3, -14230 +; P8LE-NEXT: lis r7, -19946 +; P8LE-NEXT: lis r9, 24749 +; P8LE-NEXT: ori r3, r3, 30865 +; P8LE-NEXT: ori r7, r7, 17097 ; P8LE-NEXT: mffprd r4, f0 -; P8LE-NEXT: rldicl r5, r4, 32, 48 -; P8LE-NEXT: rldicl r6, r4, 16, 48 -; P8LE-NEXT: clrlwi r8, r5, 16 -; P8LE-NEXT: rldicl r4, r4, 48, 48 +; P8LE-NEXT: rldicl r5, r4, 48, 48 +; P8LE-NEXT: rldicl r6, r4, 32, 48 +; P8LE-NEXT: rldicl r4, r4, 16, 48 +; P8LE-NEXT: rlwinm r8, r5, 31, 17, 31 +; P8LE-NEXT: clrlwi r6, r6, 16 +; P8LE-NEXT: clrlwi r5, r5, 16 ; P8LE-NEXT: mulhwu r3, r8, r3 -; P8LE-NEXT: clrlwi r8, r6, 16 -; P8LE-NEXT: mulhwu r7, r8, r7 -; P8LE-NEXT: rlwinm r8, r4, 31, 17, 31 -; P8LE-NEXT: mulhwu r8, r8, r9 -; P8LE-NEXT: srwi r3, r3, 4 -; P8LE-NEXT: srwi r7, r7, 11 -; P8LE-NEXT: mulli r3, r3, 23 -; P8LE-NEXT: srwi r8, r8, 8 -; P8LE-NEXT: mulli r7, r7, 5423 -; P8LE-NEXT: mulli r8, r8, 654 +; P8LE-NEXT: ori r8, r9, 47143 +; P8LE-NEXT: clrlwi r4, r4, 16 +; P8LE-NEXT: li r9, 0 +; P8LE-NEXT: mulhwu r7, r6, r7 +; P8LE-NEXT: mulhwu r8, r4, r8 +; P8LE-NEXT: mtvsrd v2, r9 +; P8LE-NEXT: srwi r3, r3, 8 +; P8LE-NEXT: srwi r7, r7, 4 +; P8LE-NEXT: mulli r3, r3, 654 +; P8LE-NEXT: srwi r8, r8, 11 +; P8LE-NEXT: mulli r7, r7, 23 +; P8LE-NEXT: mulli r8, r8, 5423 ; P8LE-NEXT: sub r3, r5, r3 ; P8LE-NEXT: sub r5, r6, r7 -; P8LE-NEXT: mtfprd f0, r3 +; P8LE-NEXT: mtvsrd v3, r3 ; P8LE-NEXT: sub r3, r4, r8 -; P8LE-NEXT: mtfprd f1, r5 -; P8LE-NEXT: mtfprd f2, r3 -; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: xxswapd v3, vs1 -; P8LE-NEXT: xxswapd v4, vs2 -; P8LE-NEXT: vmrglh v2, v3, v2 -; P8LE-NEXT: vmrglh v3, v4, v5 -; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: mtvsrd v4, r5 +; P8LE-NEXT: mtvsrd v5, r3 +; P8LE-NEXT: vmrghh v2, v3, v2 +; P8LE-NEXT: vmrghh v3, v5, v4 +; P8LE-NEXT: vmrglw v2, v3, v2 ; P8LE-NEXT: blr ; ; P8BE-LABEL: dont_fold_urem_one: diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll index 239b38e2ec70..48b62f57c1c9 100644 --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll @@ -20,12 +20,10 @@ define i32 @test2elt(i64 %a.coerce) local_unnamed_addr #0 { ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: vmrghh v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprwz r3, f0 ; CHECK-P8-NEXT: blr @@ -40,13 +38,11 @@ define i32 @test2elt(i64 %a.coerce) local_unnamed_addr #0 { ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: xxswapd v2, vs1 -; CHECK-P9-NEXT: xxswapd v3, vs0 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: li r3, 0 +; CHECK-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-P9-NEXT: vextuwrx r3, r3, v2 ; CHECK-P9-NEXT: blr ; @@ -90,20 +86,16 @@ define i64 @test4elt(<4 x float> %a) local_unnamed_addr #1 { ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtfprd f1, r3 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mffprwz r3, f0 ; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: mtfprd f0, r3 +; CHECK-P8-NEXT: mtvsrd v3, r3 ; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtfprd f2, r4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: mtfprd f3, r3 -; CHECK-P8-NEXT: xxswapd v3, vs2 -; CHECK-P8-NEXT: xxswapd v5, vs3 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrglh v3, v4, v5 -; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: mtvsrd v5, r3 +; CHECK-P8-NEXT: vmrghh v3, v4, v3 +; CHECK-P8-NEXT: vmrghh v2, v2, v5 +; CHECK-P8-NEXT: vmrglw v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr @@ -114,27 +106,23 @@ define i64 @test4elt(<4 x float> %a) local_unnamed_addr #1 { ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: xxswapd v3, vs0 ; CHECK-P9-NEXT: xxswapd vs0, v2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: xxswapd v4, vs0 ; CHECK-P9-NEXT: xscvspdpn f0, v2 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghh v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: vmrglh v2, v4, v2 +; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: vmrghh v2, v4, v2 ; CHECK-P9-NEXT: vmrglw v2, v2, v3 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr @@ -180,59 +168,51 @@ define <8 x i16> @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr # ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lvx v5, r3, r4 -; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: lvx v3, r3, r4 ; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xxsldwi vs2, v5, v5, 3 -; CHECK-P8-NEXT: xscvspdpn f4, v5 -; CHECK-P8-NEXT: xxswapd vs3, v5 -; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: xscvspdpn f2, v2 +; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 +; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 +; CHECK-P8-NEXT: xscvspdpn f3, v3 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f2, vs2 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 ; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: mffprwz r6, f1 -; CHECK-P8-NEXT: mffprwz r5, f0 -; CHECK-P8-NEXT: mtfprd f1, r6 -; CHECK-P8-NEXT: mtfprd f0, r5 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 1 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: xscvspdpn f0, v2 -; CHECK-P8-NEXT: mtfprd f4, r4 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xxswapd v1, vs4 -; CHECK-P8-NEXT: vmrglh v2, v4, v3 -; CHECK-P8-NEXT: mtfprd f2, r4 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: xxswapd v5, vs2 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: mtfprd f1, r3 +; CHECK-P8-NEXT: xxswapd vs0, v3 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: xxsldwi vs1, v3, v3, 1 +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: mffprwz r3, f2 +; CHECK-P8-NEXT: xscvdpsxws f2, f4 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xscvdpsxws f4, f5 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: vmrghh v2, v4, v2 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: mtvsrd v3, r3 ; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtfprd f3, r4 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: xxswapd v6, vs3 -; CHECK-P8-NEXT: xxswapd v0, vs0 -; CHECK-P8-NEXT: vmrglh v3, v3, v4 -; CHECK-P8-NEXT: vmrglh v4, v0, v5 -; CHECK-P8-NEXT: vmrglh v5, v1, v6 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: vmrghh v3, v3, v4 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mffprwz r3, f4 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mtvsrd v5, r3 +; CHECK-P8-NEXT: mffprwz r3, f1 +; CHECK-P8-NEXT: vmrghh v5, v0, v5 +; CHECK-P8-NEXT: mtvsrd v1, r3 ; CHECK-P8-NEXT: vmrglw v2, v3, v2 -; CHECK-P8-NEXT: vmrglw v3, v5, v4 +; CHECK-P8-NEXT: vmrghh v4, v4, v1 +; CHECK-P8-NEXT: vmrglw v3, v4, v5 ; CHECK-P8-NEXT: xxmrgld v2, v3, v2 ; CHECK-P8-NEXT: blr ; @@ -244,53 +224,45 @@ define <8 x i16> @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr # ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v2, vs2 ; CHECK-P9-NEXT: xxswapd vs2, vs1 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v3, vs2 ; CHECK-P9-NEXT: xscvspdpn f2, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs2 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v3, vs1 ; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 ; CHECK-P9-NEXT: xscvspdpn f1, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrghh v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs1 -; CHECK-P9-NEXT: xxswapd v5, vs0 -; CHECK-P9-NEXT: vmrglh v4, v4, v5 +; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: vmrghh v4, v4, v5 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld v2, v3, v2 ; CHECK-P9-NEXT: blr @@ -363,116 +335,100 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo ; CHECK-P8-LABEL: test16elt: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: lvx v5, 0, r4 -; CHECK-P8-NEXT: li r6, 32 ; CHECK-P8-NEXT: li r5, 16 -; CHECK-P8-NEXT: lvx v2, r4, r6 +; CHECK-P8-NEXT: li r6, 32 ; CHECK-P8-NEXT: lvx v3, r4, r5 +; CHECK-P8-NEXT: lvx v2, r4, r6 ; CHECK-P8-NEXT: li r6, 48 -; CHECK-P8-NEXT: xscvspdpn f0, v5 -; CHECK-P8-NEXT: xxsldwi vs1, v5, v5, 3 +; CHECK-P8-NEXT: xxsldwi vs0, v5, v5, 3 +; CHECK-P8-NEXT: xscvspdpn f1, v5 ; CHECK-P8-NEXT: lvx v4, r4, r6 -; CHECK-P8-NEXT: xscvspdpn f4, v2 -; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1 -; CHECK-P8-NEXT: xscvspdpn f2, v3 ; CHECK-P8-NEXT: xxswapd vs3, v5 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xxswapd vs8, v3 -; CHECK-P8-NEXT: xscvspdpn f6, v4 +; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1 ; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 3 -; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xxsldwi vs10, v2, v2, 3 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xxsldwi vs9, v3, v3, 1 +; CHECK-P8-NEXT: xxswapd vs8, v3 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 ; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xxsldwi vs12, v2, v2, 1 -; CHECK-P8-NEXT: xscvspdpn f8, vs8 -; CHECK-P8-NEXT: xxswapd vs11, v2 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xxswapd v2, v4 -; CHECK-P8-NEXT: xscvspdpn f7, vs7 -; CHECK-P8-NEXT: xxsldwi vs13, v4, v4, 3 -; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xxsldwi v3, v4, v4, 1 -; CHECK-P8-NEXT: xscvspdpn f10, vs10 +; CHECK-P8-NEXT: xscvspdpn f5, vs5 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvspdpn f9, vs9 -; CHECK-P8-NEXT: xscvdpsxws f6, f6 -; CHECK-P8-NEXT: xscvspdpn f12, vs12 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xscvspdpn f11, vs11 +; CHECK-P8-NEXT: xscvspdpn f7, vs7 +; CHECK-P8-NEXT: xscvspdpn f8, vs8 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvspdpn v2, v2 -; CHECK-P8-NEXT: xscvdpsxws f8, f8 -; CHECK-P8-NEXT: mtfprd f0, r4 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: xscvdpsxws f7, f7 -; CHECK-P8-NEXT: mffprwz r6, f2 -; CHECK-P8-NEXT: xscvspdpn f13, vs13 -; CHECK-P8-NEXT: xscvspdpn v3, v3 -; CHECK-P8-NEXT: xscvdpsxws f10, f10 -; CHECK-P8-NEXT: mtfprd f4, r4 +; CHECK-P8-NEXT: xscvspdpn f2, v3 ; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f9, f9 -; CHECK-P8-NEXT: mtfprd f2, r6 -; CHECK-P8-NEXT: mffprwz r6, f6 -; CHECK-P8-NEXT: xscvdpsxws f12, f12 -; CHECK-P8-NEXT: mtfprd f1, r4 +; CHECK-P8-NEXT: xscvdpsxws f1, f5 +; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: xxsldwi vs0, v3, v3, 1 +; CHECK-P8-NEXT: xscvspdpn f4, v2 +; CHECK-P8-NEXT: xscvdpsxws f5, f7 +; CHECK-P8-NEXT: xxsldwi vs7, v4, v4, 3 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: mffprwz r4, f3 +; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 3 +; CHECK-P8-NEXT: xscvspdpn f6, v4 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: xscvdpsxws f1, f8 +; CHECK-P8-NEXT: xxswapd vs8, v4 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: mtvsrd v1, r4 ; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: xscvdpsxws f11, f11 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: mtfprd f6, r6 -; CHECK-P8-NEXT: mffprwz r6, f3 -; CHECK-P8-NEXT: xscvdpsxws v2, v2 -; CHECK-P8-NEXT: xxswapd v9, vs6 -; CHECK-P8-NEXT: mtfprd f5, r4 -; CHECK-P8-NEXT: mffprwz r4, f8 -; CHECK-P8-NEXT: mtfprd f3, r6 -; CHECK-P8-NEXT: xxswapd v0, vs5 -; CHECK-P8-NEXT: mffprwz r6, f7 -; CHECK-P8-NEXT: xscvdpsxws f13, f13 -; CHECK-P8-NEXT: xxswapd v5, vs3 -; CHECK-P8-NEXT: xscvdpsxws v3, v3 -; CHECK-P8-NEXT: mtfprd f8, r4 -; CHECK-P8-NEXT: mffprwz r4, f10 -; CHECK-P8-NEXT: mtfprd f7, r6 -; CHECK-P8-NEXT: mffprwz r6, f9 -; CHECK-P8-NEXT: mtfprd f10, r4 -; CHECK-P8-NEXT: mffprwz r4, f12 -; CHECK-P8-NEXT: mtfprd f9, r6 -; CHECK-P8-NEXT: xxswapd v6, vs10 -; CHECK-P8-NEXT: mffprwz r6, f11 -; CHECK-P8-NEXT: mtfprd f12, r4 -; CHECK-P8-NEXT: xxswapd v1, vs9 -; CHECK-P8-NEXT: mfvsrwz r4, v2 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: mtfprd f11, r6 -; CHECK-P8-NEXT: mffprwz r6, f13 -; CHECK-P8-NEXT: mtfprd f0, r4 -; CHECK-P8-NEXT: xxswapd v7, vs11 -; CHECK-P8-NEXT: mfvsrwz r4, v3 -; CHECK-P8-NEXT: vmrglh v3, v5, v4 -; CHECK-P8-NEXT: xxswapd v4, vs7 -; CHECK-P8-NEXT: vmrglh v2, v2, v0 -; CHECK-P8-NEXT: xxswapd v5, vs8 -; CHECK-P8-NEXT: xxswapd v0, vs2 -; CHECK-P8-NEXT: mtfprd f13, r6 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: xxswapd v8, vs0 -; CHECK-P8-NEXT: vmrglh v4, v5, v4 -; CHECK-P8-NEXT: vmrglh v5, v0, v1 -; CHECK-P8-NEXT: xxswapd v1, vs4 -; CHECK-P8-NEXT: vmrglh v0, v7, v6 -; CHECK-P8-NEXT: xxswapd v6, vs12 -; CHECK-P8-NEXT: xxswapd v7, vs13 -; CHECK-P8-NEXT: xxswapd v10, vs1 +; CHECK-P8-NEXT: xxswapd vs5, v2 +; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: vmrghh v3, v0, v3 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: xscvdpsxws f6, f6 +; CHECK-P8-NEXT: xscvspdpn f1, vs5 +; CHECK-P8-NEXT: xxsldwi vs5, v2, v2, 1 +; CHECK-P8-NEXT: mtvsrd v6, r4 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: vmrghh v2, v5, v1 +; CHECK-P8-NEXT: vmrghh v5, v6, v0 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mffprwz r4, f4 +; CHECK-P8-NEXT: xscvdpsxws f2, f3 +; CHECK-P8-NEXT: xscvspdpn f5, vs5 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: mffprwz r4, f6 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: mtvsrd v6, r4 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: xscvspdpn f7, vs7 +; CHECK-P8-NEXT: mtvsrd v7, r4 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: xxsldwi vs2, v4, v4, 1 +; CHECK-P8-NEXT: xscvspdpn f8, vs8 +; CHECK-P8-NEXT: xscvdpsxws f0, f5 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: xscvspdpn f1, vs2 +; CHECK-P8-NEXT: xscvdpsxws f3, f7 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: xscvdpsxws f0, f8 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: mffprwz r4, f3 +; CHECK-P8-NEXT: vmrghh v0, v0, v7 +; CHECK-P8-NEXT: mtvsrd v7, r4 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: vmrghh v4, v8, v4 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: vmrghh v1, v1, v9 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: vmrghh v7, v8, v7 +; CHECK-P8-NEXT: vmrghh v6, v6, v9 ; CHECK-P8-NEXT: vmrglw v2, v2, v3 -; CHECK-P8-NEXT: vmrglh v1, v1, v6 -; CHECK-P8-NEXT: vmrglh v6, v8, v7 -; CHECK-P8-NEXT: vmrglh v7, v9, v10 -; CHECK-P8-NEXT: vmrglw v3, v5, v4 -; CHECK-P8-NEXT: vmrglw v4, v1, v0 -; CHECK-P8-NEXT: vmrglw v5, v7, v6 +; CHECK-P8-NEXT: vmrglw v3, v0, v5 +; CHECK-P8-NEXT: vmrglw v4, v1, v4 +; CHECK-P8-NEXT: vmrglw v5, v6, v7 ; CHECK-P8-NEXT: xxmrgld v2, v3, v2 ; CHECK-P8-NEXT: stvx v2, 0, r3 ; CHECK-P8-NEXT: xxmrgld v3, v5, v4 @@ -481,118 +437,102 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo ; ; CHECK-P9-LABEL: test16elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs1, 0(r4) -; CHECK-P9-NEXT: lxv vs3, 16(r4) -; CHECK-P9-NEXT: xscvspdpn f5, vs1 -; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-P9-NEXT: xscvspdpn f8, vs3 -; CHECK-P9-NEXT: xxswapd vs4, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: xscvspdpn f4, vs4 -; CHECK-P9-NEXT: xscvdpsxws f5, f5 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f8, f8 -; CHECK-P9-NEXT: xxsldwi vs6, vs3, vs3, 3 -; CHECK-P9-NEXT: xxswapd vs7, vs3 -; CHECK-P9-NEXT: xscvspdpn f6, vs6 -; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 1 -; CHECK-P9-NEXT: xscvspdpn f7, vs7 +; CHECK-P9-NEXT: lxv vs2, 0(r4) +; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 +; CHECK-P9-NEXT: xxswapd vs4, vs2 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: xscvdpsxws f6, f6 -; CHECK-P9-NEXT: mffprwz r5, f5 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: xscvdpsxws f7, f7 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mtfprd f5, r5 -; CHECK-P9-NEXT: mffprwz r5, f8 -; CHECK-P9-NEXT: mtfprd f8, r5 -; CHECK-P9-NEXT: mffprwz r5, f2 -; CHECK-P9-NEXT: lxv vs0, 32(r4) -; CHECK-P9-NEXT: xxsldwi vs9, vs0, vs0, 3 -; CHECK-P9-NEXT: xxswapd vs10, vs0 -; CHECK-P9-NEXT: xscvspdpn f9, vs9 -; CHECK-P9-NEXT: xscvspdpn f10, vs10 -; CHECK-P9-NEXT: xscvdpsxws f9, f9 -; CHECK-P9-NEXT: xscvdpsxws f10, f10 -; CHECK-P9-NEXT: mtfprd f2, r5 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: xscvspdpn f5, vs2 +; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mffprwz r5, f3 +; CHECK-P9-NEXT: lxv vs1, 16(r4) +; CHECK-P9-NEXT: xxsldwi vs6, vs1, vs1, 3 +; CHECK-P9-NEXT: xxswapd vs3, vs1 +; CHECK-P9-NEXT: mtvsrd v2, r5 ; CHECK-P9-NEXT: mffprwz r5, f4 -; CHECK-P9-NEXT: mtfprd f4, r5 +; CHECK-P9-NEXT: xscvdpsxws f4, f5 +; CHECK-P9-NEXT: xscvspdpn f3, vs3 +; CHECK-P9-NEXT: mtvsrd v3, r5 +; CHECK-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r5, f4 +; CHECK-P9-NEXT: xscvspdpn f4, vs6 +; CHECK-P9-NEXT: mtvsrd v3, r5 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: xscvspdpn f2, vs1 +; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: lxv vs0, 32(r4) +; CHECK-P9-NEXT: mtvsrd v4, r5 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 +; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r5, f4 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mtvsrd v4, r5 +; CHECK-P9-NEXT: mffprwz r5, f3 +; CHECK-P9-NEXT: xxsldwi vs3, vs0, vs0, 3 +; CHECK-P9-NEXT: mtvsrd v5, r5 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: xscvspdpn f2, vs3 +; CHECK-P9-NEXT: vmrghh v4, v5, v4 +; CHECK-P9-NEXT: mtvsrd v5, r5 ; CHECK-P9-NEXT: mffprwz r5, f1 -; CHECK-P9-NEXT: mtfprd f1, r5 -; CHECK-P9-NEXT: mffprwz r5, f6 -; CHECK-P9-NEXT: xxswapd v2, vs2 -; CHECK-P9-NEXT: xxswapd v3, vs4 +; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mtvsrd v0, r5 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: vmrghh v5, v5, v0 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrglw v3, v5, v4 +; CHECK-P9-NEXT: mffprwz r5, f2 ; CHECK-P9-NEXT: xscvspdpn f2, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-P9-NEXT: mtvsrd v0, r5 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mtfprd f6, r5 -; CHECK-P9-NEXT: mffprwz r5, f7 -; CHECK-P9-NEXT: xxswapd v4, vs1 +; CHECK-P9-NEXT: mffprwz r5, f1 ; CHECK-P9-NEXT: lxv vs1, 48(r4) -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs5 -; CHECK-P9-NEXT: mtfprd f7, r5 -; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs6 -; CHECK-P9-NEXT: xxswapd v5, vs7 -; CHECK-P9-NEXT: mtfprd f3, r5 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: xxswapd v0, vs3 -; CHECK-P9-NEXT: vmrglh v4, v5, v4 -; CHECK-P9-NEXT: xxswapd v5, vs8 -; CHECK-P9-NEXT: vmrglh v5, v5, v0 +; CHECK-P9-NEXT: mtvsrd v1, r5 +; CHECK-P9-NEXT: vmrghh v0, v1, v0 ; CHECK-P9-NEXT: mffprwz r4, f2 -; CHECK-P9-NEXT: mtfprd f2, r4 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: mtfprd f0, r4 -; CHECK-P9-NEXT: vmrglw v3, v5, v4 -; CHECK-P9-NEXT: xxswapd v4, vs2 ; CHECK-P9-NEXT: xxmrgld vs2, v3, v2 -; CHECK-P9-NEXT: xxswapd v2, vs0 +; CHECK-P9-NEXT: mtvsrd v4, r4 +; CHECK-P9-NEXT: mffprwz r4, f0 ; CHECK-P9-NEXT: xxsldwi vs0, vs1, vs1, 3 +; CHECK-P9-NEXT: mtvsrd v2, r4 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: vmrghh v2, v4, v2 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: mtfprd f0, r4 -; CHECK-P9-NEXT: xxswapd v3, vs0 -; CHECK-P9-NEXT: xxswapd vs0, vs1 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: mtfprd f0, r4 -; CHECK-P9-NEXT: vmrglh v2, v4, v2 -; CHECK-P9-NEXT: xxswapd v4, vs0 -; CHECK-P9-NEXT: xscvspdpn f0, vs1 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: mtfprd f0, r4 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs0 -; CHECK-P9-NEXT: xxsldwi vs0, vs1, vs1, 1 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r5, f9 -; CHECK-P9-NEXT: mtfprd f9, r5 -; CHECK-P9-NEXT: mffprwz r5, f10 -; CHECK-P9-NEXT: mtfprd f10, r5 -; CHECK-P9-NEXT: xxswapd v0, vs9 -; CHECK-P9-NEXT: xxswapd v1, vs10 -; CHECK-P9-NEXT: vmrglh v0, v1, v0 ; CHECK-P9-NEXT: vmrglw v2, v2, v0 -; CHECK-P9-NEXT: stxv vs2, 0(r3) ; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: mtfprd f0, r4 -; CHECK-P9-NEXT: xxswapd v5, vs0 -; CHECK-P9-NEXT: vmrglh v4, v4, v5 +; CHECK-P9-NEXT: xxswapd vs0, vs1 +; CHECK-P9-NEXT: mtvsrd v3, r4 +; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: xscvspdpn f0, vs1 +; CHECK-P9-NEXT: mtvsrd v4, r4 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghh v3, v4, v3 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: xxsldwi vs0, vs1, vs1, 1 +; CHECK-P9-NEXT: mtvsrd v4, r4 +; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mtvsrd v5, r4 +; CHECK-P9-NEXT: vmrghh v4, v4, v5 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld vs0, v3, v2 ; CHECK-P9-NEXT: stxv vs0, 16(r3) +; CHECK-P9-NEXT: stxv vs2, 0(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt: @@ -728,12 +668,10 @@ define i32 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 { ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: vmrghh v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprwz r3, f0 ; CHECK-P8-NEXT: blr @@ -748,13 +686,11 @@ define i32 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 { ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: xxswapd v2, vs1 -; CHECK-P9-NEXT: xxswapd v3, vs0 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: li r3, 0 +; CHECK-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-P9-NEXT: vextuwrx r3, r3, v2 ; CHECK-P9-NEXT: blr ; @@ -798,20 +734,16 @@ define i64 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 { ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtfprd f1, r3 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mffprwz r3, f0 ; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: mtfprd f0, r3 +; CHECK-P8-NEXT: mtvsrd v3, r3 ; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtfprd f2, r4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: mtfprd f3, r3 -; CHECK-P8-NEXT: xxswapd v3, vs2 -; CHECK-P8-NEXT: xxswapd v5, vs3 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrglh v3, v4, v5 -; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: mtvsrd v5, r3 +; CHECK-P8-NEXT: vmrghh v3, v4, v3 +; CHECK-P8-NEXT: vmrghh v2, v2, v5 +; CHECK-P8-NEXT: vmrglw v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr @@ -822,27 +754,23 @@ define i64 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 { ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: xxswapd v3, vs0 ; CHECK-P9-NEXT: xxswapd vs0, v2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: xxswapd v4, vs0 ; CHECK-P9-NEXT: xscvspdpn f0, v2 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghh v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: vmrglh v2, v4, v2 +; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: vmrghh v2, v4, v2 ; CHECK-P9-NEXT: vmrglw v2, v2, v3 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr @@ -888,59 +816,51 @@ define <8 x i16> @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lvx v5, r3, r4 -; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: lvx v3, r3, r4 ; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xxsldwi vs2, v5, v5, 3 -; CHECK-P8-NEXT: xscvspdpn f4, v5 -; CHECK-P8-NEXT: xxswapd vs3, v5 -; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: xscvspdpn f2, v2 +; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 +; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 +; CHECK-P8-NEXT: xscvspdpn f3, v3 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f2, vs2 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 ; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: mffprwz r6, f1 -; CHECK-P8-NEXT: mffprwz r5, f0 -; CHECK-P8-NEXT: mtfprd f1, r6 -; CHECK-P8-NEXT: mtfprd f0, r5 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 1 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: xscvspdpn f0, v2 -; CHECK-P8-NEXT: mtfprd f4, r4 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xxswapd v1, vs4 -; CHECK-P8-NEXT: vmrglh v2, v4, v3 -; CHECK-P8-NEXT: mtfprd f2, r4 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: xxswapd v5, vs2 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: mtfprd f1, r3 +; CHECK-P8-NEXT: xxswapd vs0, v3 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: xxsldwi vs1, v3, v3, 1 +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: mffprwz r3, f2 +; CHECK-P8-NEXT: xscvdpsxws f2, f4 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xscvdpsxws f4, f5 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: vmrghh v2, v4, v2 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: mtvsrd v3, r3 ; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtfprd f3, r4 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: xxswapd v6, vs3 -; CHECK-P8-NEXT: xxswapd v0, vs0 -; CHECK-P8-NEXT: vmrglh v3, v3, v4 -; CHECK-P8-NEXT: vmrglh v4, v0, v5 -; CHECK-P8-NEXT: vmrglh v5, v1, v6 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: vmrghh v3, v3, v4 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mffprwz r3, f4 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mtvsrd v5, r3 +; CHECK-P8-NEXT: mffprwz r3, f1 +; CHECK-P8-NEXT: vmrghh v5, v0, v5 +; CHECK-P8-NEXT: mtvsrd v1, r3 ; CHECK-P8-NEXT: vmrglw v2, v3, v2 -; CHECK-P8-NEXT: vmrglw v3, v5, v4 +; CHECK-P8-NEXT: vmrghh v4, v4, v1 +; CHECK-P8-NEXT: vmrglw v3, v4, v5 ; CHECK-P8-NEXT: xxmrgld v2, v3, v2 ; CHECK-P8-NEXT: blr ; @@ -952,53 +872,45 @@ define <8 x i16> @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v2, vs2 ; CHECK-P9-NEXT: xxswapd vs2, vs1 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v3, vs2 ; CHECK-P9-NEXT: xscvspdpn f2, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs2 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v3, vs1 ; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 ; CHECK-P9-NEXT: xscvspdpn f1, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrghh v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs1 -; CHECK-P9-NEXT: xxswapd v5, vs0 -; CHECK-P9-NEXT: vmrglh v4, v4, v5 +; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: vmrghh v4, v4, v5 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld v2, v3, v2 ; CHECK-P9-NEXT: blr @@ -1071,116 +983,100 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1 ; CHECK-P8-LABEL: test16elt_signed: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: lvx v5, 0, r4 -; CHECK-P8-NEXT: li r6, 32 ; CHECK-P8-NEXT: li r5, 16 -; CHECK-P8-NEXT: lvx v2, r4, r6 +; CHECK-P8-NEXT: li r6, 32 ; CHECK-P8-NEXT: lvx v3, r4, r5 +; CHECK-P8-NEXT: lvx v2, r4, r6 ; CHECK-P8-NEXT: li r6, 48 -; CHECK-P8-NEXT: xscvspdpn f0, v5 -; CHECK-P8-NEXT: xxsldwi vs1, v5, v5, 3 +; CHECK-P8-NEXT: xxsldwi vs0, v5, v5, 3 +; CHECK-P8-NEXT: xscvspdpn f1, v5 ; CHECK-P8-NEXT: lvx v4, r4, r6 -; CHECK-P8-NEXT: xscvspdpn f4, v2 -; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1 -; CHECK-P8-NEXT: xscvspdpn f2, v3 ; CHECK-P8-NEXT: xxswapd vs3, v5 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xxswapd vs8, v3 -; CHECK-P8-NEXT: xscvspdpn f6, v4 +; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1 ; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 3 -; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xxsldwi vs10, v2, v2, 3 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xxsldwi vs9, v3, v3, 1 +; CHECK-P8-NEXT: xxswapd vs8, v3 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 ; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xxsldwi vs12, v2, v2, 1 -; CHECK-P8-NEXT: xscvspdpn f8, vs8 -; CHECK-P8-NEXT: xxswapd vs11, v2 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xxswapd v2, v4 -; CHECK-P8-NEXT: xscvspdpn f7, vs7 -; CHECK-P8-NEXT: xxsldwi vs13, v4, v4, 3 -; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xxsldwi v3, v4, v4, 1 -; CHECK-P8-NEXT: xscvspdpn f10, vs10 +; CHECK-P8-NEXT: xscvspdpn f5, vs5 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvspdpn f9, vs9 -; CHECK-P8-NEXT: xscvdpsxws f6, f6 -; CHECK-P8-NEXT: xscvspdpn f12, vs12 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xscvspdpn f11, vs11 +; CHECK-P8-NEXT: xscvspdpn f7, vs7 +; CHECK-P8-NEXT: xscvspdpn f8, vs8 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvspdpn v2, v2 -; CHECK-P8-NEXT: xscvdpsxws f8, f8 -; CHECK-P8-NEXT: mtfprd f0, r4 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: xscvdpsxws f7, f7 -; CHECK-P8-NEXT: mffprwz r6, f2 -; CHECK-P8-NEXT: xscvspdpn f13, vs13 -; CHECK-P8-NEXT: xscvspdpn v3, v3 -; CHECK-P8-NEXT: xscvdpsxws f10, f10 -; CHECK-P8-NEXT: mtfprd f4, r4 +; CHECK-P8-NEXT: xscvspdpn f2, v3 ; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f9, f9 -; CHECK-P8-NEXT: mtfprd f2, r6 -; CHECK-P8-NEXT: mffprwz r6, f6 -; CHECK-P8-NEXT: xscvdpsxws f12, f12 -; CHECK-P8-NEXT: mtfprd f1, r4 +; CHECK-P8-NEXT: xscvdpsxws f1, f5 +; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: xxsldwi vs0, v3, v3, 1 +; CHECK-P8-NEXT: xscvspdpn f4, v2 +; CHECK-P8-NEXT: xscvdpsxws f5, f7 +; CHECK-P8-NEXT: xxsldwi vs7, v4, v4, 3 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: mffprwz r4, f3 +; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 3 +; CHECK-P8-NEXT: xscvspdpn f6, v4 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: xscvdpsxws f1, f8 +; CHECK-P8-NEXT: xxswapd vs8, v4 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: mtvsrd v1, r4 ; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: xscvdpsxws f11, f11 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: mtfprd f6, r6 -; CHECK-P8-NEXT: mffprwz r6, f3 -; CHECK-P8-NEXT: xscvdpsxws v2, v2 -; CHECK-P8-NEXT: xxswapd v9, vs6 -; CHECK-P8-NEXT: mtfprd f5, r4 -; CHECK-P8-NEXT: mffprwz r4, f8 -; CHECK-P8-NEXT: mtfprd f3, r6 -; CHECK-P8-NEXT: xxswapd v0, vs5 -; CHECK-P8-NEXT: mffprwz r6, f7 -; CHECK-P8-NEXT: xscvdpsxws f13, f13 -; CHECK-P8-NEXT: xxswapd v5, vs3 -; CHECK-P8-NEXT: xscvdpsxws v3, v3 -; CHECK-P8-NEXT: mtfprd f8, r4 -; CHECK-P8-NEXT: mffprwz r4, f10 -; CHECK-P8-NEXT: mtfprd f7, r6 -; CHECK-P8-NEXT: mffprwz r6, f9 -; CHECK-P8-NEXT: mtfprd f10, r4 -; CHECK-P8-NEXT: mffprwz r4, f12 -; CHECK-P8-NEXT: mtfprd f9, r6 -; CHECK-P8-NEXT: xxswapd v6, vs10 -; CHECK-P8-NEXT: mffprwz r6, f11 -; CHECK-P8-NEXT: mtfprd f12, r4 -; CHECK-P8-NEXT: xxswapd v1, vs9 -; CHECK-P8-NEXT: mfvsrwz r4, v2 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: mtfprd f11, r6 -; CHECK-P8-NEXT: mffprwz r6, f13 -; CHECK-P8-NEXT: mtfprd f0, r4 -; CHECK-P8-NEXT: xxswapd v7, vs11 -; CHECK-P8-NEXT: mfvsrwz r4, v3 -; CHECK-P8-NEXT: vmrglh v3, v5, v4 -; CHECK-P8-NEXT: xxswapd v4, vs7 -; CHECK-P8-NEXT: vmrglh v2, v2, v0 -; CHECK-P8-NEXT: xxswapd v5, vs8 -; CHECK-P8-NEXT: xxswapd v0, vs2 -; CHECK-P8-NEXT: mtfprd f13, r6 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: xxswapd v8, vs0 -; CHECK-P8-NEXT: vmrglh v4, v5, v4 -; CHECK-P8-NEXT: vmrglh v5, v0, v1 -; CHECK-P8-NEXT: xxswapd v1, vs4 -; CHECK-P8-NEXT: vmrglh v0, v7, v6 -; CHECK-P8-NEXT: xxswapd v6, vs12 -; CHECK-P8-NEXT: xxswapd v7, vs13 -; CHECK-P8-NEXT: xxswapd v10, vs1 +; CHECK-P8-NEXT: xxswapd vs5, v2 +; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: vmrghh v3, v0, v3 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: xscvdpsxws f6, f6 +; CHECK-P8-NEXT: xscvspdpn f1, vs5 +; CHECK-P8-NEXT: xxsldwi vs5, v2, v2, 1 +; CHECK-P8-NEXT: mtvsrd v6, r4 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: vmrghh v2, v5, v1 +; CHECK-P8-NEXT: vmrghh v5, v6, v0 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mffprwz r4, f4 +; CHECK-P8-NEXT: xscvdpsxws f2, f3 +; CHECK-P8-NEXT: xscvspdpn f5, vs5 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: mffprwz r4, f6 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: mtvsrd v6, r4 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: xscvspdpn f7, vs7 +; CHECK-P8-NEXT: mtvsrd v7, r4 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: xxsldwi vs2, v4, v4, 1 +; CHECK-P8-NEXT: xscvspdpn f8, vs8 +; CHECK-P8-NEXT: xscvdpsxws f0, f5 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: xscvspdpn f1, vs2 +; CHECK-P8-NEXT: xscvdpsxws f3, f7 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: xscvdpsxws f0, f8 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: mffprwz r4, f3 +; CHECK-P8-NEXT: vmrghh v0, v0, v7 +; CHECK-P8-NEXT: mtvsrd v7, r4 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: vmrghh v4, v8, v4 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: vmrghh v1, v1, v9 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: vmrghh v7, v8, v7 +; CHECK-P8-NEXT: vmrghh v6, v6, v9 ; CHECK-P8-NEXT: vmrglw v2, v2, v3 -; CHECK-P8-NEXT: vmrglh v1, v1, v6 -; CHECK-P8-NEXT: vmrglh v6, v8, v7 -; CHECK-P8-NEXT: vmrglh v7, v9, v10 -; CHECK-P8-NEXT: vmrglw v3, v5, v4 -; CHECK-P8-NEXT: vmrglw v4, v1, v0 -; CHECK-P8-NEXT: vmrglw v5, v7, v6 +; CHECK-P8-NEXT: vmrglw v3, v0, v5 +; CHECK-P8-NEXT: vmrglw v4, v1, v4 +; CHECK-P8-NEXT: vmrglw v5, v6, v7 ; CHECK-P8-NEXT: xxmrgld v2, v3, v2 ; CHECK-P8-NEXT: stvx v2, 0, r3 ; CHECK-P8-NEXT: xxmrgld v3, v5, v4 @@ -1189,118 +1085,102 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1 ; ; CHECK-P9-LABEL: test16elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs1, 0(r4) -; CHECK-P9-NEXT: lxv vs3, 16(r4) -; CHECK-P9-NEXT: xscvspdpn f5, vs1 -; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-P9-NEXT: xscvspdpn f8, vs3 -; CHECK-P9-NEXT: xxswapd vs4, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: xscvspdpn f4, vs4 -; CHECK-P9-NEXT: xscvdpsxws f5, f5 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f8, f8 -; CHECK-P9-NEXT: xxsldwi vs6, vs3, vs3, 3 -; CHECK-P9-NEXT: xxswapd vs7, vs3 -; CHECK-P9-NEXT: xscvspdpn f6, vs6 -; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 1 -; CHECK-P9-NEXT: xscvspdpn f7, vs7 +; CHECK-P9-NEXT: lxv vs2, 0(r4) +; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 +; CHECK-P9-NEXT: xxswapd vs4, vs2 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: xscvdpsxws f6, f6 -; CHECK-P9-NEXT: mffprwz r5, f5 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: xscvdpsxws f7, f7 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mtfprd f5, r5 -; CHECK-P9-NEXT: mffprwz r5, f8 -; CHECK-P9-NEXT: mtfprd f8, r5 -; CHECK-P9-NEXT: mffprwz r5, f2 -; CHECK-P9-NEXT: lxv vs0, 32(r4) -; CHECK-P9-NEXT: xxsldwi vs9, vs0, vs0, 3 -; CHECK-P9-NEXT: xxswapd vs10, vs0 -; CHECK-P9-NEXT: xscvspdpn f9, vs9 -; CHECK-P9-NEXT: xscvspdpn f10, vs10 -; CHECK-P9-NEXT: xscvdpsxws f9, f9 -; CHECK-P9-NEXT: xscvdpsxws f10, f10 -; CHECK-P9-NEXT: mtfprd f2, r5 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: xscvspdpn f5, vs2 +; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mffprwz r5, f3 +; CHECK-P9-NEXT: lxv vs1, 16(r4) +; CHECK-P9-NEXT: xxsldwi vs6, vs1, vs1, 3 +; CHECK-P9-NEXT: xxswapd vs3, vs1 +; CHECK-P9-NEXT: mtvsrd v2, r5 ; CHECK-P9-NEXT: mffprwz r5, f4 -; CHECK-P9-NEXT: mtfprd f4, r5 +; CHECK-P9-NEXT: xscvdpsxws f4, f5 +; CHECK-P9-NEXT: xscvspdpn f3, vs3 +; CHECK-P9-NEXT: mtvsrd v3, r5 +; CHECK-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r5, f4 +; CHECK-P9-NEXT: xscvspdpn f4, vs6 +; CHECK-P9-NEXT: mtvsrd v3, r5 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: xscvspdpn f2, vs1 +; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: lxv vs0, 32(r4) +; CHECK-P9-NEXT: mtvsrd v4, r5 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 +; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r5, f4 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mtvsrd v4, r5 +; CHECK-P9-NEXT: mffprwz r5, f3 +; CHECK-P9-NEXT: xxsldwi vs3, vs0, vs0, 3 +; CHECK-P9-NEXT: mtvsrd v5, r5 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: xscvspdpn f2, vs3 +; CHECK-P9-NEXT: vmrghh v4, v5, v4 +; CHECK-P9-NEXT: mtvsrd v5, r5 ; CHECK-P9-NEXT: mffprwz r5, f1 -; CHECK-P9-NEXT: mtfprd f1, r5 -; CHECK-P9-NEXT: mffprwz r5, f6 -; CHECK-P9-NEXT: xxswapd v2, vs2 -; CHECK-P9-NEXT: xxswapd v3, vs4 +; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mtvsrd v0, r5 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: vmrghh v5, v5, v0 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrglw v3, v5, v4 +; CHECK-P9-NEXT: mffprwz r5, f2 ; CHECK-P9-NEXT: xscvspdpn f2, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-P9-NEXT: mtvsrd v0, r5 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mtfprd f6, r5 -; CHECK-P9-NEXT: mffprwz r5, f7 -; CHECK-P9-NEXT: xxswapd v4, vs1 +; CHECK-P9-NEXT: mffprwz r5, f1 ; CHECK-P9-NEXT: lxv vs1, 48(r4) -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs5 -; CHECK-P9-NEXT: mtfprd f7, r5 -; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs6 -; CHECK-P9-NEXT: xxswapd v5, vs7 -; CHECK-P9-NEXT: mtfprd f3, r5 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: xxswapd v0, vs3 -; CHECK-P9-NEXT: vmrglh v4, v5, v4 -; CHECK-P9-NEXT: xxswapd v5, vs8 -; CHECK-P9-NEXT: vmrglh v5, v5, v0 +; CHECK-P9-NEXT: mtvsrd v1, r5 +; CHECK-P9-NEXT: vmrghh v0, v1, v0 ; CHECK-P9-NEXT: mffprwz r4, f2 -; CHECK-P9-NEXT: mtfprd f2, r4 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: mtfprd f0, r4 -; CHECK-P9-NEXT: vmrglw v3, v5, v4 -; CHECK-P9-NEXT: xxswapd v4, vs2 ; CHECK-P9-NEXT: xxmrgld vs2, v3, v2 -; CHECK-P9-NEXT: xxswapd v2, vs0 +; CHECK-P9-NEXT: mtvsrd v4, r4 +; CHECK-P9-NEXT: mffprwz r4, f0 ; CHECK-P9-NEXT: xxsldwi vs0, vs1, vs1, 3 +; CHECK-P9-NEXT: mtvsrd v2, r4 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: vmrghh v2, v4, v2 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: mtfprd f0, r4 -; CHECK-P9-NEXT: xxswapd v3, vs0 -; CHECK-P9-NEXT: xxswapd vs0, vs1 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: mtfprd f0, r4 -; CHECK-P9-NEXT: vmrglh v2, v4, v2 -; CHECK-P9-NEXT: xxswapd v4, vs0 -; CHECK-P9-NEXT: xscvspdpn f0, vs1 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: mtfprd f0, r4 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs0 -; CHECK-P9-NEXT: xxsldwi vs0, vs1, vs1, 1 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r5, f9 -; CHECK-P9-NEXT: mtfprd f9, r5 -; CHECK-P9-NEXT: mffprwz r5, f10 -; CHECK-P9-NEXT: mtfprd f10, r5 -; CHECK-P9-NEXT: xxswapd v0, vs9 -; CHECK-P9-NEXT: xxswapd v1, vs10 -; CHECK-P9-NEXT: vmrglh v0, v1, v0 ; CHECK-P9-NEXT: vmrglw v2, v2, v0 -; CHECK-P9-NEXT: stxv vs2, 0(r3) ; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: mtfprd f0, r4 -; CHECK-P9-NEXT: xxswapd v5, vs0 -; CHECK-P9-NEXT: vmrglh v4, v4, v5 +; CHECK-P9-NEXT: xxswapd vs0, vs1 +; CHECK-P9-NEXT: mtvsrd v3, r4 +; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: xscvspdpn f0, vs1 +; CHECK-P9-NEXT: mtvsrd v4, r4 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghh v3, v4, v3 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: xxsldwi vs0, vs1, vs1, 1 +; CHECK-P9-NEXT: mtvsrd v4, r4 +; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mtvsrd v5, r4 +; CHECK-P9-NEXT: vmrghh v4, v4, v5 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld vs0, v3, v2 ; CHECK-P9-NEXT: stxv vs0, 16(r3) +; CHECK-P9-NEXT: stxv vs2, 0(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt_signed: diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll index 1f95eda2b1b5..928a19f3a55c 100644 --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll @@ -20,12 +20,10 @@ define i16 @test2elt(i64 %a.coerce) local_unnamed_addr #0 { ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: vmrglb v2, v3, v2 +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: vmrghb v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: clrldi r3, r3, 48 @@ -43,13 +41,11 @@ define i16 @test2elt(i64 %a.coerce) local_unnamed_addr #0 { ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: addi r3, r1, -2 -; CHECK-P9-NEXT: xxswapd v2, vs1 -; CHECK-P9-NEXT: xxswapd v3, vs0 -; CHECK-P9-NEXT: vmrglb v2, v3, v2 +; CHECK-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-P9-NEXT: vsldoi v2, v2, v2, 8 ; CHECK-P9-NEXT: stxsihx v2, 0, r3 ; CHECK-P9-NEXT: lhz r3, -2(r1) @@ -97,20 +93,16 @@ define i32 @test4elt(<4 x float> %a) local_unnamed_addr #1 { ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtfprd f1, r3 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mffprwz r3, f0 ; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: mtfprd f0, r3 +; CHECK-P8-NEXT: mtvsrd v3, r3 ; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtfprd f2, r4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: mtfprd f3, r3 -; CHECK-P8-NEXT: xxswapd v3, vs2 -; CHECK-P8-NEXT: xxswapd v5, vs3 -; CHECK-P8-NEXT: vmrglb v2, v3, v2 -; CHECK-P8-NEXT: vmrglb v3, v4, v5 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: mtvsrd v5, r3 +; CHECK-P8-NEXT: vmrghb v3, v4, v3 +; CHECK-P8-NEXT: vmrghb v2, v2, v5 +; CHECK-P8-NEXT: vmrglh v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprwz r3, f0 ; CHECK-P8-NEXT: blr @@ -121,28 +113,24 @@ define i32 @test4elt(<4 x float> %a) local_unnamed_addr #1 { ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: xxswapd v3, vs0 ; CHECK-P9-NEXT: xxswapd vs0, v2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: xxswapd v4, vs0 ; CHECK-P9-NEXT: xscvspdpn f0, v2 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: vmrglb v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: vmrglb v2, v4, v2 +; CHECK-P9-NEXT: vmrghb v2, v4, v2 ; CHECK-P9-NEXT: vmrglh v2, v2, v3 ; CHECK-P9-NEXT: vextuwrx r3, r3, v2 ; CHECK-P9-NEXT: blr @@ -189,59 +177,51 @@ define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 { ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lvx v5, r3, r4 -; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: lvx v3, r3, r4 ; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xxsldwi vs2, v5, v5, 3 -; CHECK-P8-NEXT: xscvspdpn f4, v5 -; CHECK-P8-NEXT: xxswapd vs3, v5 -; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: xscvspdpn f2, v2 +; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 +; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 +; CHECK-P8-NEXT: xscvspdpn f3, v3 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f2, vs2 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 ; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: mffprwz r6, f1 -; CHECK-P8-NEXT: mffprwz r5, f0 -; CHECK-P8-NEXT: mtfprd f1, r6 -; CHECK-P8-NEXT: mtfprd f0, r5 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 1 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: xscvspdpn f0, v2 -; CHECK-P8-NEXT: mtfprd f4, r4 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xxswapd v1, vs4 -; CHECK-P8-NEXT: vmrglb v2, v4, v3 -; CHECK-P8-NEXT: mtfprd f2, r4 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: xxswapd v5, vs2 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: mtfprd f1, r3 +; CHECK-P8-NEXT: xxswapd vs0, v3 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: xxsldwi vs1, v3, v3, 1 +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: mffprwz r3, f2 +; CHECK-P8-NEXT: xscvdpsxws f2, f4 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xscvdpsxws f4, f5 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: vmrghb v2, v4, v2 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: mtvsrd v3, r3 ; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtfprd f3, r4 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: xxswapd v6, vs3 -; CHECK-P8-NEXT: xxswapd v0, vs0 -; CHECK-P8-NEXT: vmrglb v3, v3, v4 -; CHECK-P8-NEXT: vmrglb v4, v0, v5 -; CHECK-P8-NEXT: vmrglb v5, v1, v6 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: vmrghb v3, v3, v4 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mffprwz r3, f4 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mtvsrd v5, r3 +; CHECK-P8-NEXT: mffprwz r3, f1 +; CHECK-P8-NEXT: vmrghb v5, v0, v5 +; CHECK-P8-NEXT: mtvsrd v1, r3 ; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrglh v3, v5, v4 +; CHECK-P8-NEXT: vmrghb v4, v4, v1 +; CHECK-P8-NEXT: vmrglh v3, v4, v5 ; CHECK-P8-NEXT: vmrglw v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 @@ -255,53 +235,45 @@ define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 { ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v2, vs2 ; CHECK-P9-NEXT: xxswapd vs2, vs1 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v3, vs2 ; CHECK-P9-NEXT: xscvspdpn f2, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglb v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs2 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 ; CHECK-P9-NEXT: vmrglh v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v3, vs1 ; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 ; CHECK-P9-NEXT: xscvspdpn f1, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: vmrglb v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs1 -; CHECK-P9-NEXT: xxswapd v5, vs0 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 +; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: vmrglh v3, v4, v3 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 @@ -376,117 +348,101 @@ entry: define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lvx v4, 0, r3 ; CHECK-P8-NEXT: li r4, 16 +; CHECK-P8-NEXT: li r5, 32 ; CHECK-P8-NEXT: lvx v3, r3, r4 -; CHECK-P8-NEXT: li r4, 32 -; CHECK-P8-NEXT: xscvspdpn f2, v2 -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f4, v3 -; CHECK-P8-NEXT: xxswapd vs1, v2 -; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1 -; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 -; CHECK-P8-NEXT: lvx v2, r3, r4 +; CHECK-P8-NEXT: lvx v2, r3, r5 +; CHECK-P8-NEXT: xxsldwi vs0, v4, v4, 3 +; CHECK-P8-NEXT: xxswapd vs2, v4 +; CHECK-P8-NEXT: xxsldwi vs4, v4, v4, 1 +; CHECK-P8-NEXT: xscvspdpn f1, v4 +; CHECK-P8-NEXT: xscvspdpn f3, v3 +; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xxswapd vs6, v3 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 1 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xxsldwi vs8, v2, v2, 3 -; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xxswapd vs9, v2 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xxswapd vs7, v3 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: xxsldwi vs8, v3, v3, 1 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: xxsldwi vs9, v2, v2, 3 ; CHECK-P8-NEXT: xscvspdpn f6, vs6 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r4, f2 ; CHECK-P8-NEXT: xscvspdpn f7, vs7 -; CHECK-P8-NEXT: mtfprd f2, r4 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvspdpn f8, vs8 -; CHECK-P8-NEXT: mtfprd f4, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xscvdpsxws f0, f5 -; CHECK-P8-NEXT: xxswapd v0, vs4 -; CHECK-P8-NEXT: xscvspdpn f9, vs9 -; CHECK-P8-NEXT: mtfprd f5, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f1, f6 -; CHECK-P8-NEXT: xxswapd v3, vs5 -; CHECK-P8-NEXT: mtfprd f6, r4 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: xscvdpsxws f3, f7 -; CHECK-P8-NEXT: xxswapd v4, vs6 -; CHECK-P8-NEXT: mtfprd f7, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xscvdpsxws f0, f8 -; CHECK-P8-NEXT: xxswapd v5, vs7 -; CHECK-P8-NEXT: mtfprd f8, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f1, f9 -; CHECK-P8-NEXT: xxswapd v1, vs8 -; CHECK-P8-NEXT: mtfprd f9, r4 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: vmrglb v3, v4, v3 -; CHECK-P8-NEXT: xxswapd v4, vs2 -; CHECK-P8-NEXT: mtfprd f3, r4 -; CHECK-P8-NEXT: xxswapd v6, vs9 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xscvspdpn f0, v2 -; CHECK-P8-NEXT: xxswapd v7, vs3 -; CHECK-P8-NEXT: mtfprd f5, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: vmrglb v4, v4, v5 -; CHECK-P8-NEXT: xxswapd v5, vs5 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: lvx v9, r3, r4 -; CHECK-P8-NEXT: vmrglb v1, v6, v1 -; CHECK-P8-NEXT: xxswapd v8, vs1 -; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 1 -; CHECK-P8-NEXT: xxsldwi vs2, v9, v9, 3 -; CHECK-P8-NEXT: xscvspdpn f4, v9 -; CHECK-P8-NEXT: xxswapd vs3, v9 -; CHECK-P8-NEXT: xxsldwi vs5, v9, v9, 1 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xscvspdpn f2, vs2 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xscvspdpn f5, vs5 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: xscvspdpn f8, vs8 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvspdpn f9, vs9 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: mffprwz r5, f2 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: mtvsrd v4, r5 +; CHECK-P8-NEXT: mffprwz r5, f4 +; CHECK-P8-NEXT: xscvdpsxws f1, f6 +; CHECK-P8-NEXT: vmrghb v3, v4, v3 +; CHECK-P8-NEXT: mtvsrd v4, r5 +; CHECK-P8-NEXT: mffprwz r5, f3 +; CHECK-P8-NEXT: xscvdpsxws f3, f7 +; CHECK-P8-NEXT: xscvdpsxws f4, f8 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: li r4, 48 +; CHECK-P8-NEXT: lvx v0, r3, r4 +; CHECK-P8-NEXT: mffprwz r3, f1 +; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 1 +; CHECK-P8-NEXT: xscvspdpn f5, v2 +; CHECK-P8-NEXT: mffprwz r4, f3 +; CHECK-P8-NEXT: xxsldwi vs3, v0, v0, 3 +; CHECK-P8-NEXT: mtvsrd v1, r3 +; CHECK-P8-NEXT: mffprwz r3, f4 +; CHECK-P8-NEXT: xxswapd vs4, v0 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: mtvsrd v7, r3 +; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: xxsldwi vs0, v0, v0, 1 +; CHECK-P8-NEXT: xscvspdpn f2, v0 +; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: xscvdpsxws f6, f9 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: mtfprd f4, r4 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xxswapd v9, vs4 -; CHECK-P8-NEXT: mtfprd f1, r3 -; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtfprd f2, r4 -; CHECK-P8-NEXT: xxswapd v6, vs1 +; CHECK-P8-NEXT: mtvsrd v6, r4 +; CHECK-P8-NEXT: mffprwz r4, f6 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: vmrghb v2, v6, v1 +; CHECK-P8-NEXT: mtvsrd v1, r4 ; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: vmrglb v2, v0, v7 -; CHECK-P8-NEXT: xxswapd v0, vs0 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: xxswapd v7, vs2 -; CHECK-P8-NEXT: mtfprd f3, r4 -; CHECK-P8-NEXT: vmrglb v5, v8, v5 -; CHECK-P8-NEXT: xxswapd v8, vs0 -; CHECK-P8-NEXT: xxswapd v10, vs3 -; CHECK-P8-NEXT: vmrglb v0, v0, v6 +; CHECK-P8-NEXT: mtvsrd v6, r3 +; CHECK-P8-NEXT: mffprwz r3, f1 +; CHECK-P8-NEXT: vmrghb v4, v5, v4 +; CHECK-P8-NEXT: mtvsrd v5, r5 +; CHECK-P8-NEXT: vmrghb v0, v6, v1 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: mtvsrd v6, r3 +; CHECK-P8-NEXT: mffprwz r3, f3 +; CHECK-P8-NEXT: vmrghb v5, v5, v7 +; CHECK-P8-NEXT: vmrghb v1, v1, v6 +; CHECK-P8-NEXT: mtvsrd v6, r4 +; CHECK-P8-NEXT: mffprwz r4, f4 +; CHECK-P8-NEXT: mtvsrd v7, r3 +; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mtvsrd v9, r3 +; CHECK-P8-NEXT: vmrghb v7, v8, v7 +; CHECK-P8-NEXT: vmrghb v6, v6, v9 ; CHECK-P8-NEXT: vmrglh v3, v4, v3 -; CHECK-P8-NEXT: vmrglb v6, v8, v7 -; CHECK-P8-NEXT: vmrglb v7, v9, v10 -; CHECK-P8-NEXT: vmrglh v2, v2, v1 -; CHECK-P8-NEXT: vmrglh v4, v0, v5 -; CHECK-P8-NEXT: vmrglh v5, v7, v6 +; CHECK-P8-NEXT: vmrglh v2, v5, v2 +; CHECK-P8-NEXT: vmrglh v4, v1, v0 +; CHECK-P8-NEXT: vmrglh v5, v6, v7 ; CHECK-P8-NEXT: vmrglw v2, v2, v3 ; CHECK-P8-NEXT: vmrglw v3, v5, v4 ; CHECK-P8-NEXT: xxmrgld v2, v3, v2 @@ -494,114 +450,98 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr ; ; CHECK-P9-LABEL: test16elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs2, 0(r3) -; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: lxv vs3, 0(r3) +; CHECK-P9-NEXT: xxsldwi vs4, vs3, vs3, 3 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 ; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: lxv vs1, 32(r3) -; CHECK-P9-NEXT: lxv vs4, 16(r3) +; CHECK-P9-NEXT: lxv vs2, 16(r3) +; CHECK-P9-NEXT: mffprwz r3, f4 +; CHECK-P9-NEXT: xxswapd vs4, vs3 +; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: mffprwz r3, f4 +; CHECK-P9-NEXT: xscvspdpn f4, vs3 +; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 1 +; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xscvspdpn f3, vs3 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: vmrghb v2, v3, v2 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: mffprwz r3, f4 +; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r3, f3 +; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 +; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: xscvspdpn f3, vs3 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: vmrglh v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtfprd f3, r3 -; CHECK-P9-NEXT: xxswapd v2, vs3 ; CHECK-P9-NEXT: xxswapd vs3, vs2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtfprd f3, r3 -; CHECK-P9-NEXT: xxswapd v3, vs3 ; CHECK-P9-NEXT: xscvspdpn f3, vs2 ; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtfprd f3, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v4, vs2 -; CHECK-P9-NEXT: xxsldwi vs2, vs4, vs4, 3 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrglb v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs3 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v3, vs2 -; CHECK-P9-NEXT: xxswapd vs2, vs4 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v4, vs2 -; CHECK-P9-NEXT: xscvspdpn f2, vs4 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: vmrglb v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs2 -; CHECK-P9-NEXT: xxsldwi vs2, vs4, vs4, 1 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v5, vs2 ; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 ; CHECK-P9-NEXT: vmrglh v3, v4, v3 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v3, vs2 ; CHECK-P9-NEXT: xxswapd vs2, vs1 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v4, vs2 ; CHECK-P9-NEXT: xscvspdpn f2, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v5, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglb v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs2 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 ; CHECK-P9-NEXT: vmrglh v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 ; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v5, vs1 ; CHECK-P9-NEXT: xscvspdpn f1, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrghb v4, v5, v4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: vmrglb v4, v5, v4 -; CHECK-P9-NEXT: xxswapd v5, vs1 -; CHECK-P9-NEXT: xxswapd v0, vs0 -; CHECK-P9-NEXT: vmrglb v5, v5, v0 +; CHECK-P9-NEXT: mtvsrd v0, r3 +; CHECK-P9-NEXT: vmrghb v5, v5, v0 ; CHECK-P9-NEXT: vmrglh v4, v5, v4 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld v2, v3, v2 @@ -738,12 +678,10 @@ define i16 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 { ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: vmrglb v2, v3, v2 +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: vmrghb v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: clrldi r3, r3, 48 @@ -761,13 +699,11 @@ define i16 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 { ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: addi r3, r1, -2 -; CHECK-P9-NEXT: xxswapd v2, vs1 -; CHECK-P9-NEXT: xxswapd v3, vs0 -; CHECK-P9-NEXT: vmrglb v2, v3, v2 +; CHECK-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-P9-NEXT: vsldoi v2, v2, v2, 8 ; CHECK-P9-NEXT: stxsihx v2, 0, r3 ; CHECK-P9-NEXT: lhz r3, -2(r1) @@ -815,20 +751,16 @@ define i32 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 { ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtfprd f1, r3 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mffprwz r3, f0 ; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: mtfprd f0, r3 +; CHECK-P8-NEXT: mtvsrd v3, r3 ; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtfprd f2, r4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: mtfprd f3, r3 -; CHECK-P8-NEXT: xxswapd v3, vs2 -; CHECK-P8-NEXT: xxswapd v5, vs3 -; CHECK-P8-NEXT: vmrglb v2, v3, v2 -; CHECK-P8-NEXT: vmrglb v3, v4, v5 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: mtvsrd v5, r3 +; CHECK-P8-NEXT: vmrghb v3, v4, v3 +; CHECK-P8-NEXT: vmrghb v2, v2, v5 +; CHECK-P8-NEXT: vmrglh v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprwz r3, f0 ; CHECK-P8-NEXT: blr @@ -839,28 +771,24 @@ define i32 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 { ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: xxswapd v3, vs0 ; CHECK-P9-NEXT: xxswapd vs0, v2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: xxswapd v4, vs0 ; CHECK-P9-NEXT: xscvspdpn f0, v2 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: vmrglb v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: vmrglb v2, v4, v2 +; CHECK-P9-NEXT: vmrghb v2, v4, v2 ; CHECK-P9-NEXT: vmrglh v2, v2, v3 ; CHECK-P9-NEXT: vextuwrx r3, r3, v2 ; CHECK-P9-NEXT: blr @@ -907,59 +835,51 @@ define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lvx v5, r3, r4 -; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: lvx v3, r3, r4 ; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xxsldwi vs2, v5, v5, 3 -; CHECK-P8-NEXT: xscvspdpn f4, v5 -; CHECK-P8-NEXT: xxswapd vs3, v5 -; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: xscvspdpn f2, v2 +; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 +; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 +; CHECK-P8-NEXT: xscvspdpn f3, v3 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f2, vs2 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 ; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: mffprwz r6, f1 -; CHECK-P8-NEXT: mffprwz r5, f0 -; CHECK-P8-NEXT: mtfprd f1, r6 -; CHECK-P8-NEXT: mtfprd f0, r5 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 1 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: xscvspdpn f0, v2 -; CHECK-P8-NEXT: mtfprd f4, r4 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xxswapd v1, vs4 -; CHECK-P8-NEXT: vmrglb v2, v4, v3 -; CHECK-P8-NEXT: mtfprd f2, r4 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: xxswapd v5, vs2 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: mtfprd f1, r3 +; CHECK-P8-NEXT: xxswapd vs0, v3 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: xxsldwi vs1, v3, v3, 1 +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: mffprwz r3, f2 +; CHECK-P8-NEXT: xscvdpsxws f2, f4 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xscvdpsxws f4, f5 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: vmrghb v2, v4, v2 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: mtvsrd v3, r3 ; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtfprd f3, r4 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: xxswapd v6, vs3 -; CHECK-P8-NEXT: xxswapd v0, vs0 -; CHECK-P8-NEXT: vmrglb v3, v3, v4 -; CHECK-P8-NEXT: vmrglb v4, v0, v5 -; CHECK-P8-NEXT: vmrglb v5, v1, v6 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: vmrghb v3, v3, v4 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mffprwz r3, f4 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mtvsrd v5, r3 +; CHECK-P8-NEXT: mffprwz r3, f1 +; CHECK-P8-NEXT: vmrghb v5, v0, v5 +; CHECK-P8-NEXT: mtvsrd v1, r3 ; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrglh v3, v5, v4 +; CHECK-P8-NEXT: vmrghb v4, v4, v1 +; CHECK-P8-NEXT: vmrglh v3, v4, v5 ; CHECK-P8-NEXT: vmrglw v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 @@ -973,53 +893,45 @@ define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v2, vs2 ; CHECK-P9-NEXT: xxswapd vs2, vs1 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v3, vs2 ; CHECK-P9-NEXT: xscvspdpn f2, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglb v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs2 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 ; CHECK-P9-NEXT: vmrglh v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v3, vs1 ; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 ; CHECK-P9-NEXT: xscvspdpn f1, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: vmrglb v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs1 -; CHECK-P9-NEXT: xxswapd v5, vs0 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 +; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: vmrglh v3, v4, v3 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 @@ -1094,117 +1006,101 @@ entry: define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lvx v4, 0, r3 ; CHECK-P8-NEXT: li r4, 16 +; CHECK-P8-NEXT: li r5, 32 ; CHECK-P8-NEXT: lvx v3, r3, r4 -; CHECK-P8-NEXT: li r4, 32 -; CHECK-P8-NEXT: xscvspdpn f2, v2 -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f4, v3 -; CHECK-P8-NEXT: xxswapd vs1, v2 -; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1 -; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 -; CHECK-P8-NEXT: lvx v2, r3, r4 +; CHECK-P8-NEXT: lvx v2, r3, r5 +; CHECK-P8-NEXT: xxsldwi vs0, v4, v4, 3 +; CHECK-P8-NEXT: xxswapd vs2, v4 +; CHECK-P8-NEXT: xxsldwi vs4, v4, v4, 1 +; CHECK-P8-NEXT: xscvspdpn f1, v4 +; CHECK-P8-NEXT: xscvspdpn f3, v3 +; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xxswapd vs6, v3 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 1 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xxsldwi vs8, v2, v2, 3 -; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xxswapd vs9, v2 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xxswapd vs7, v3 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: xxsldwi vs8, v3, v3, 1 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: xxsldwi vs9, v2, v2, 3 ; CHECK-P8-NEXT: xscvspdpn f6, vs6 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r4, f2 ; CHECK-P8-NEXT: xscvspdpn f7, vs7 -; CHECK-P8-NEXT: mtfprd f2, r4 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvspdpn f8, vs8 -; CHECK-P8-NEXT: mtfprd f4, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xscvdpsxws f0, f5 -; CHECK-P8-NEXT: xxswapd v0, vs4 -; CHECK-P8-NEXT: xscvspdpn f9, vs9 -; CHECK-P8-NEXT: mtfprd f5, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f1, f6 -; CHECK-P8-NEXT: xxswapd v3, vs5 -; CHECK-P8-NEXT: mtfprd f6, r4 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: xscvdpsxws f3, f7 -; CHECK-P8-NEXT: xxswapd v4, vs6 -; CHECK-P8-NEXT: mtfprd f7, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xscvdpsxws f0, f8 -; CHECK-P8-NEXT: xxswapd v5, vs7 -; CHECK-P8-NEXT: mtfprd f8, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f1, f9 -; CHECK-P8-NEXT: xxswapd v1, vs8 -; CHECK-P8-NEXT: mtfprd f9, r4 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: vmrglb v3, v4, v3 -; CHECK-P8-NEXT: xxswapd v4, vs2 -; CHECK-P8-NEXT: mtfprd f3, r4 -; CHECK-P8-NEXT: xxswapd v6, vs9 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xscvspdpn f0, v2 -; CHECK-P8-NEXT: xxswapd v7, vs3 -; CHECK-P8-NEXT: mtfprd f5, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: vmrglb v4, v4, v5 -; CHECK-P8-NEXT: xxswapd v5, vs5 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: lvx v9, r3, r4 -; CHECK-P8-NEXT: vmrglb v1, v6, v1 -; CHECK-P8-NEXT: xxswapd v8, vs1 -; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 1 -; CHECK-P8-NEXT: xxsldwi vs2, v9, v9, 3 -; CHECK-P8-NEXT: xscvspdpn f4, v9 -; CHECK-P8-NEXT: xxswapd vs3, v9 -; CHECK-P8-NEXT: xxsldwi vs5, v9, v9, 1 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xscvspdpn f2, vs2 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xscvspdpn f5, vs5 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: xscvspdpn f8, vs8 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvspdpn f9, vs9 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: mffprwz r5, f2 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: mtvsrd v4, r5 +; CHECK-P8-NEXT: mffprwz r5, f4 +; CHECK-P8-NEXT: xscvdpsxws f1, f6 +; CHECK-P8-NEXT: vmrghb v3, v4, v3 +; CHECK-P8-NEXT: mtvsrd v4, r5 +; CHECK-P8-NEXT: mffprwz r5, f3 +; CHECK-P8-NEXT: xscvdpsxws f3, f7 +; CHECK-P8-NEXT: xscvdpsxws f4, f8 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: li r4, 48 +; CHECK-P8-NEXT: lvx v0, r3, r4 +; CHECK-P8-NEXT: mffprwz r3, f1 +; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 1 +; CHECK-P8-NEXT: xscvspdpn f5, v2 +; CHECK-P8-NEXT: mffprwz r4, f3 +; CHECK-P8-NEXT: xxsldwi vs3, v0, v0, 3 +; CHECK-P8-NEXT: mtvsrd v1, r3 +; CHECK-P8-NEXT: mffprwz r3, f4 +; CHECK-P8-NEXT: xxswapd vs4, v0 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: mtvsrd v7, r3 +; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: xxsldwi vs0, v0, v0, 1 +; CHECK-P8-NEXT: xscvspdpn f2, v0 +; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: xscvdpsxws f6, f9 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: mtfprd f4, r4 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xxswapd v9, vs4 -; CHECK-P8-NEXT: mtfprd f1, r3 -; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtfprd f2, r4 -; CHECK-P8-NEXT: xxswapd v6, vs1 +; CHECK-P8-NEXT: mtvsrd v6, r4 +; CHECK-P8-NEXT: mffprwz r4, f6 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: vmrghb v2, v6, v1 +; CHECK-P8-NEXT: mtvsrd v1, r4 ; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: vmrglb v2, v0, v7 -; CHECK-P8-NEXT: xxswapd v0, vs0 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: xxswapd v7, vs2 -; CHECK-P8-NEXT: mtfprd f3, r4 -; CHECK-P8-NEXT: vmrglb v5, v8, v5 -; CHECK-P8-NEXT: xxswapd v8, vs0 -; CHECK-P8-NEXT: xxswapd v10, vs3 -; CHECK-P8-NEXT: vmrglb v0, v0, v6 +; CHECK-P8-NEXT: mtvsrd v6, r3 +; CHECK-P8-NEXT: mffprwz r3, f1 +; CHECK-P8-NEXT: vmrghb v4, v5, v4 +; CHECK-P8-NEXT: mtvsrd v5, r5 +; CHECK-P8-NEXT: vmrghb v0, v6, v1 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: mtvsrd v6, r3 +; CHECK-P8-NEXT: mffprwz r3, f3 +; CHECK-P8-NEXT: vmrghb v5, v5, v7 +; CHECK-P8-NEXT: vmrghb v1, v1, v6 +; CHECK-P8-NEXT: mtvsrd v6, r4 +; CHECK-P8-NEXT: mffprwz r4, f4 +; CHECK-P8-NEXT: mtvsrd v7, r3 +; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mtvsrd v9, r3 +; CHECK-P8-NEXT: vmrghb v7, v8, v7 +; CHECK-P8-NEXT: vmrghb v6, v6, v9 ; CHECK-P8-NEXT: vmrglh v3, v4, v3 -; CHECK-P8-NEXT: vmrglb v6, v8, v7 -; CHECK-P8-NEXT: vmrglb v7, v9, v10 -; CHECK-P8-NEXT: vmrglh v2, v2, v1 -; CHECK-P8-NEXT: vmrglh v4, v0, v5 -; CHECK-P8-NEXT: vmrglh v5, v7, v6 +; CHECK-P8-NEXT: vmrglh v2, v5, v2 +; CHECK-P8-NEXT: vmrglh v4, v1, v0 +; CHECK-P8-NEXT: vmrglh v5, v6, v7 ; CHECK-P8-NEXT: vmrglw v2, v2, v3 ; CHECK-P8-NEXT: vmrglw v3, v5, v4 ; CHECK-P8-NEXT: xxmrgld v2, v3, v2 @@ -1212,114 +1108,98 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam ; ; CHECK-P9-LABEL: test16elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs2, 0(r3) -; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: lxv vs3, 0(r3) +; CHECK-P9-NEXT: xxsldwi vs4, vs3, vs3, 3 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 ; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: lxv vs1, 32(r3) -; CHECK-P9-NEXT: lxv vs4, 16(r3) +; CHECK-P9-NEXT: lxv vs2, 16(r3) +; CHECK-P9-NEXT: mffprwz r3, f4 +; CHECK-P9-NEXT: xxswapd vs4, vs3 +; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: mffprwz r3, f4 +; CHECK-P9-NEXT: xscvspdpn f4, vs3 +; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 1 +; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xscvspdpn f3, vs3 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: vmrghb v2, v3, v2 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: mffprwz r3, f4 +; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r3, f3 +; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 +; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: xscvspdpn f3, vs3 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: vmrglh v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtfprd f3, r3 -; CHECK-P9-NEXT: xxswapd v2, vs3 ; CHECK-P9-NEXT: xxswapd vs3, vs2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtfprd f3, r3 -; CHECK-P9-NEXT: xxswapd v3, vs3 ; CHECK-P9-NEXT: xscvspdpn f3, vs2 ; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtfprd f3, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v4, vs2 -; CHECK-P9-NEXT: xxsldwi vs2, vs4, vs4, 3 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrglb v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs3 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v3, vs2 -; CHECK-P9-NEXT: xxswapd vs2, vs4 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v4, vs2 -; CHECK-P9-NEXT: xscvspdpn f2, vs4 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: vmrglb v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs2 -; CHECK-P9-NEXT: xxsldwi vs2, vs4, vs4, 1 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v5, vs2 ; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 ; CHECK-P9-NEXT: vmrglh v3, v4, v3 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v3, vs2 ; CHECK-P9-NEXT: xxswapd vs2, vs1 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v4, vs2 ; CHECK-P9-NEXT: xscvspdpn f2, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v5, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglb v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs2 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 ; CHECK-P9-NEXT: vmrglh v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 ; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v5, vs1 ; CHECK-P9-NEXT: xscvspdpn f1, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrghb v4, v5, v4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: vmrglb v4, v5, v4 -; CHECK-P9-NEXT: xxswapd v5, vs1 -; CHECK-P9-NEXT: xxswapd v0, vs0 -; CHECK-P9-NEXT: vmrglb v5, v5, v0 +; CHECK-P9-NEXT: mtvsrd v0, r3 +; CHECK-P9-NEXT: vmrghb v5, v5, v0 ; CHECK-P9-NEXT: vmrglh v4, v5, v4 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld v2, v3, v2 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll index c7d66ae784a0..dbc2774fed8c 100644 --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll @@ -16,12 +16,10 @@ define i32 @test2elt(<2 x double> %a) local_unnamed_addr #0 { ; CHECK-P8-NEXT: xscvdpsxws f1, v2 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f1 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: vmrglh v2, v2, v3 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: vmrghh v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprwz r3, f0 ; CHECK-P8-NEXT: blr @@ -30,15 +28,13 @@ define i32 @test2elt(<2 x double> %a) local_unnamed_addr #0 { ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: xscvdpsxws f0, v2 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: xxswapd v3, vs0 ; CHECK-P9-NEXT: xxswapd vs0, v2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-P9-NEXT: vextuwrx r3, r3, v2 ; CHECK-P9-NEXT: blr ; @@ -77,18 +73,14 @@ define i64 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 { ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mffprwz r3, f2 ; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: mtfprd f2, r3 -; CHECK-P8-NEXT: mtfprd f3, r4 +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: xxswapd v2, vs2 ; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xxswapd v4, vs3 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: xxswapd v5, vs1 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrglh v3, v5, v4 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: vmrghh v2, v4, v2 +; CHECK-P8-NEXT: vmrghh v3, v5, v3 ; CHECK-P8-NEXT: vmrglw v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 @@ -102,22 +94,18 @@ define i64 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 { ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xxswapd v2, vs2 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v3, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: vmrglh v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs1 -; CHECK-P9-NEXT: xxswapd v4, vs0 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 +; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr @@ -176,36 +164,28 @@ define <8 x i16> @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr ; CHECK-P8-NEXT: xxswapd vs3, vs3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r3, f4 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: mffprwz r3, f4 ; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: mtfprd f4, r3 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mffprwz r3, f6 -; CHECK-P8-NEXT: mtfprd f5, r4 -; CHECK-P8-NEXT: xxswapd v2, vs4 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mffprwz r4, f7 -; CHECK-P8-NEXT: mtfprd f6, r3 -; CHECK-P8-NEXT: xxswapd v3, vs5 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mtvsrd v5, r4 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mtfprd f7, r4 -; CHECK-P8-NEXT: xxswapd v4, vs6 ; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: xxswapd v1, vs7 +; CHECK-P8-NEXT: mtvsrd v0, r3 +; CHECK-P8-NEXT: mtvsrd v1, r4 ; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: xxswapd v5, vs0 ; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: mtfprd f2, r3 -; CHECK-P8-NEXT: xxswapd v0, vs1 -; CHECK-P8-NEXT: mtfprd f0, r4 -; CHECK-P8-NEXT: xxswapd v6, vs2 -; CHECK-P8-NEXT: vmrglh v2, v5, v2 -; CHECK-P8-NEXT: xxswapd v5, vs0 -; CHECK-P8-NEXT: vmrglh v3, v0, v3 -; CHECK-P8-NEXT: vmrglh v4, v6, v4 -; CHECK-P8-NEXT: vmrglh v5, v5, v1 +; CHECK-P8-NEXT: vmrghh v2, v0, v2 +; CHECK-P8-NEXT: vmrghh v3, v1, v3 +; CHECK-P8-NEXT: mtvsrd v0, r3 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: vmrghh v4, v0, v4 +; CHECK-P8-NEXT: vmrghh v5, v1, v5 ; CHECK-P8-NEXT: vmrglw v2, v3, v2 ; CHECK-P8-NEXT: vmrglw v3, v5, v4 ; CHECK-P8-NEXT: xxmrgld v2, v3, v2 @@ -217,47 +197,39 @@ define <8 x i16> @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: lxv vs2, 16(r3) ; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: lxv vs1, 32(r3) -; CHECK-P9-NEXT: lxv vs2, 16(r3) ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtfprd f4, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xxswapd v2, vs4 -; CHECK-P9-NEXT: mtfprd f3, r3 -; CHECK-P9-NEXT: xxswapd v3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtfprd f3, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v4, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: vmrglh v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs3 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs2 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 +; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r3, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs1 -; CHECK-P9-NEXT: xxswapd v5, vs0 -; CHECK-P9-NEXT: vmrglh v4, v4, v5 +; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: vmrghh v4, v4, v5 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld v2, v3, v2 ; CHECK-P9-NEXT: blr @@ -321,209 +293,177 @@ entry: define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x double>* nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: li r5, 16 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: li r6, 32 +; CHECK-P8-NEXT: li r7, 48 ; CHECK-P8-NEXT: lxvd2x vs1, r4, r5 ; CHECK-P8-NEXT: lxvd2x vs2, r4, r6 -; CHECK-P8-NEXT: li r6, 48 -; CHECK-P8-NEXT: lxvd2x vs3, r4, r6 ; CHECK-P8-NEXT: li r6, 64 -; CHECK-P8-NEXT: xscvdpsxws f4, f0 +; CHECK-P8-NEXT: lxvd2x vs3, r4, r7 ; CHECK-P8-NEXT: lxvd2x vs5, r4, r6 -; CHECK-P8-NEXT: li r6, 80 +; CHECK-P8-NEXT: li r7, 80 +; CHECK-P8-NEXT: li r6, 96 +; CHECK-P8-NEXT: xscvdpsxws f4, f0 +; CHECK-P8-NEXT: lxvd2x vs7, r4, r7 +; CHECK-P8-NEXT: lxvd2x vs10, r4, r6 +; CHECK-P8-NEXT: li r6, 112 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: xscvdpsxws f6, f1 -; CHECK-P8-NEXT: lxvd2x vs7, r4, r6 -; CHECK-P8-NEXT: li r6, 96 ; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: xscvdpsxws f8, f2 -; CHECK-P8-NEXT: lxvd2x vs9, r4, r6 -; CHECK-P8-NEXT: li r6, 112 ; CHECK-P8-NEXT: xxswapd vs2, vs2 -; CHECK-P8-NEXT: xscvdpsxws f10, f3 -; CHECK-P8-NEXT: lxvd2x vs11, r4, r6 +; CHECK-P8-NEXT: xscvdpsxws f9, f3 ; CHECK-P8-NEXT: xxswapd vs3, vs3 -; CHECK-P8-NEXT: xscvdpsxws f12, f5 +; CHECK-P8-NEXT: xscvdpsxws f11, f5 ; CHECK-P8-NEXT: xxswapd vs5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f13, f7 +; CHECK-P8-NEXT: xscvdpsxws f12, f7 ; CHECK-P8-NEXT: xxswapd vs7, vs7 -; CHECK-P8-NEXT: xscvdpsxws v2, f9 -; CHECK-P8-NEXT: xxswapd vs9, vs9 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: xscvdpsxws v3, f11 -; CHECK-P8-NEXT: xxswapd vs11, vs11 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mffprwz r6, f6 -; CHECK-P8-NEXT: mtfprd f4, r4 +; CHECK-P8-NEXT: mffprwz r7, f4 +; CHECK-P8-NEXT: lxvd2x vs4, r4, r6 +; CHECK-P8-NEXT: mffprwz r4, f6 +; CHECK-P8-NEXT: xscvdpsxws f13, f10 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mffprwz r4, f8 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xxswapd v4, vs4 -; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: mtfprd f6, r6 -; CHECK-P8-NEXT: mffprwz r6, f10 -; CHECK-P8-NEXT: mtfprd f8, r4 -; CHECK-P8-NEXT: xxswapd v5, vs6 -; CHECK-P8-NEXT: mffprwz r4, f12 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: xxswapd v0, vs8 -; CHECK-P8-NEXT: mtfprd f10, r6 -; CHECK-P8-NEXT: mffprwz r6, f13 -; CHECK-P8-NEXT: mtfprd f12, r4 -; CHECK-P8-NEXT: xxswapd v1, vs10 -; CHECK-P8-NEXT: mfvsrwz r4, v2 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xxswapd v6, vs12 -; CHECK-P8-NEXT: xscvdpsxws f9, f9 -; CHECK-P8-NEXT: mtfprd f13, r6 -; CHECK-P8-NEXT: mfvsrwz r6, v3 -; CHECK-P8-NEXT: mtvsrd v2, r4 -; CHECK-P8-NEXT: xxswapd v7, vs13 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xscvdpsxws f7, f7 -; CHECK-P8-NEXT: xxswapd v2, v2 -; CHECK-P8-NEXT: xscvdpsxws f11, f11 -; CHECK-P8-NEXT: mtvsrd v3, r6 -; CHECK-P8-NEXT: mffprwz r6, f1 -; CHECK-P8-NEXT: mtfprd f0, r4 -; CHECK-P8-NEXT: xxswapd v3, v3 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: mtfprd f1, r6 -; CHECK-P8-NEXT: xxswapd v8, vs0 -; CHECK-P8-NEXT: mtfprd f2, r4 -; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: xxswapd v9, vs1 -; CHECK-P8-NEXT: mffprwz r6, f3 -; CHECK-P8-NEXT: xxswapd v10, vs2 -; CHECK-P8-NEXT: mtfprd f5, r4 +; CHECK-P8-NEXT: xscvdpsxws f6, f4 +; CHECK-P8-NEXT: mtvsrd v4, r4 ; CHECK-P8-NEXT: mffprwz r4, f9 -; CHECK-P8-NEXT: mtfprd f3, r6 -; CHECK-P8-NEXT: mffprwz r6, f7 -; CHECK-P8-NEXT: mtfprd f9, r4 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: mtvsrd v5, r4 ; CHECK-P8-NEXT: mffprwz r4, f11 -; CHECK-P8-NEXT: vmrglh v4, v8, v4 -; CHECK-P8-NEXT: xxswapd v8, vs3 -; CHECK-P8-NEXT: vmrglh v5, v9, v5 -; CHECK-P8-NEXT: xxswapd v9, vs5 -; CHECK-P8-NEXT: mtfprd f7, r6 -; CHECK-P8-NEXT: mtfprd f0, r4 -; CHECK-P8-NEXT: vmrglh v0, v10, v0 -; CHECK-P8-NEXT: xxswapd v10, vs7 -; CHECK-P8-NEXT: vmrglh v1, v8, v1 -; CHECK-P8-NEXT: xxswapd v8, vs9 -; CHECK-P8-NEXT: vmrglh v6, v9, v6 -; CHECK-P8-NEXT: xxswapd v9, vs0 -; CHECK-P8-NEXT: vmrglh v7, v10, v7 -; CHECK-P8-NEXT: vmrglh v2, v8, v2 -; CHECK-P8-NEXT: vmrglh v3, v9, v3 -; CHECK-P8-NEXT: vmrglw v4, v5, v4 -; CHECK-P8-NEXT: vmrglw v5, v1, v0 -; CHECK-P8-NEXT: vmrglw v0, v7, v6 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mffprwz r4, f12 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: mffprwz r4, f13 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: mtvsrd v6, r4 +; CHECK-P8-NEXT: mffprwz r4, f6 +; CHECK-P8-NEXT: xxswapd vs6, vs10 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 +; CHECK-P8-NEXT: mtvsrd v7, r4 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: xxswapd vs0, vs4 +; CHECK-P8-NEXT: mtvsrd v2, r7 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: xscvdpsxws f7, f7 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: xscvdpsxws f4, f6 +; CHECK-P8-NEXT: vmrghh v2, v8, v2 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mffprwz r4, f3 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: vmrghh v3, v9, v3 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: mffprwz r4, f5 +; CHECK-P8-NEXT: vmrghh v4, v8, v4 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mffprwz r4, f7 +; CHECK-P8-NEXT: vmrghh v5, v9, v5 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: mffprwz r4, f4 +; CHECK-P8-NEXT: vmrghh v0, v8, v0 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: vmrghh v1, v9, v1 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: vmrghh v6, v8, v6 +; CHECK-P8-NEXT: vmrghh v7, v9, v7 ; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: vmrglw v3, v5, v4 +; CHECK-P8-NEXT: vmrglw v4, v1, v0 +; CHECK-P8-NEXT: vmrglw v5, v7, v6 +; CHECK-P8-NEXT: xxmrgld v2, v3, v2 +; CHECK-P8-NEXT: stvx v2, 0, r3 ; CHECK-P8-NEXT: xxmrgld v3, v5, v4 -; CHECK-P8-NEXT: stvx v3, 0, r3 -; CHECK-P8-NEXT: xxmrgld v2, v2, v0 -; CHECK-P8-NEXT: stvx v2, r3, r5 +; CHECK-P8-NEXT: stvx v3, r3, r5 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs4, 0(r4) -; CHECK-P9-NEXT: lxv vs3, 16(r4) -; CHECK-P9-NEXT: lxv vs2, 32(r4) -; CHECK-P9-NEXT: xscvdpsxws f5, f4 -; CHECK-P9-NEXT: lxv vs1, 48(r4) -; CHECK-P9-NEXT: xscvdpsxws f6, f3 -; CHECK-P9-NEXT: lxv vs0, 64(r4) -; CHECK-P9-NEXT: xscvdpsxws f7, f2 -; CHECK-P9-NEXT: xscvdpsxws f8, f1 -; CHECK-P9-NEXT: xxswapd vs4, vs4 -; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: mffprwz r5, f5 -; CHECK-P9-NEXT: xscvdpsxws f9, f0 +; CHECK-P9-NEXT: lxv vs3, 0(r4) +; CHECK-P9-NEXT: lxv vs2, 16(r4) +; CHECK-P9-NEXT: lxv vs1, 32(r4) +; CHECK-P9-NEXT: xscvdpsxws f4, f3 +; CHECK-P9-NEXT: lxv vs0, 48(r4) +; CHECK-P9-NEXT: xscvdpsxws f5, f2 +; CHECK-P9-NEXT: xscvdpsxws f6, f1 ; CHECK-P9-NEXT: xxswapd vs3, vs3 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mtfprd f5, r5 -; CHECK-P9-NEXT: mffprwz r5, f6 -; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mtfprd f6, r5 -; CHECK-P9-NEXT: mffprwz r5, f7 -; CHECK-P9-NEXT: mtfprd f7, r5 -; CHECK-P9-NEXT: mffprwz r5, f8 -; CHECK-P9-NEXT: mtfprd f8, r5 -; CHECK-P9-NEXT: mffprwz r5, f9 -; CHECK-P9-NEXT: mtfprd f9, r5 -; CHECK-P9-NEXT: mffprwz r5, f4 -; CHECK-P9-NEXT: mtfprd f4, r5 -; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: xxswapd v2, vs5 -; CHECK-P9-NEXT: xxswapd v5, vs8 -; CHECK-P9-NEXT: xxswapd v0, vs9 -; CHECK-P9-NEXT: mtfprd f3, r5 -; CHECK-P9-NEXT: mffprwz r5, f2 -; CHECK-P9-NEXT: mtfprd f2, r5 +; CHECK-P9-NEXT: xscvdpsxws f7, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: xxswapd v1, vs2 +; CHECK-P9-NEXT: mffprwz r5, f4 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: xxswapd vs2, vs2 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mtvsrd v2, r5 +; CHECK-P9-NEXT: mffprwz r5, f5 +; CHECK-P9-NEXT: mtvsrd v3, r5 +; CHECK-P9-NEXT: mffprwz r5, f6 +; CHECK-P9-NEXT: mtvsrd v4, r5 +; CHECK-P9-NEXT: mffprwz r5, f7 +; CHECK-P9-NEXT: mtvsrd v5, r5 +; CHECK-P9-NEXT: mffprwz r5, f3 +; CHECK-P9-NEXT: lxv vs3, 64(r4) +; CHECK-P9-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mtvsrd v0, r5 +; CHECK-P9-NEXT: mffprwz r5, f2 ; CHECK-P9-NEXT: lxv vs2, 80(r4) -; CHECK-P9-NEXT: xxswapd v3, vs4 -; CHECK-P9-NEXT: vmrglh v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs6 -; CHECK-P9-NEXT: xxswapd v4, vs3 +; CHECK-P9-NEXT: vmrghh v2, v2, v0 +; CHECK-P9-NEXT: mtvsrd v0, r5 +; CHECK-P9-NEXT: mffprwz r5, f1 +; CHECK-P9-NEXT: lxv vs1, 96(r4) +; CHECK-P9-NEXT: xscvdpsxws f4, f3 +; CHECK-P9-NEXT: xxswapd vs3, vs3 +; CHECK-P9-NEXT: vmrghh v3, v3, v0 +; CHECK-P9-NEXT: mtvsrd v0, r5 +; CHECK-P9-NEXT: mffprwz r5, f0 +; CHECK-P9-NEXT: lxv vs0, 112(r4) +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: vmrghh v4, v4, v0 +; CHECK-P9-NEXT: mtvsrd v0, r5 +; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: vmrghh v5, v5, v0 +; CHECK-P9-NEXT: mffprwz r4, f4 +; CHECK-P9-NEXT: vmrglw v4, v5, v4 +; CHECK-P9-NEXT: mtvsrd v3, r4 +; CHECK-P9-NEXT: mffprwz r4, f3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: mffprwz r5, f1 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs7 -; CHECK-P9-NEXT: mtfprd f1, r5 -; CHECK-P9-NEXT: mffprwz r5, f0 -; CHECK-P9-NEXT: vmrglh v4, v4, v1 -; CHECK-P9-NEXT: xxswapd v1, vs1 -; CHECK-P9-NEXT: mtfprd f0, r5 -; CHECK-P9-NEXT: vmrglh v5, v5, v1 +; CHECK-P9-NEXT: xxmrgld vs4, v4, v2 +; CHECK-P9-NEXT: mtvsrd v2, r4 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: xxswapd v1, vs0 -; CHECK-P9-NEXT: lxv vs0, 112(r4) -; CHECK-P9-NEXT: lxv vs1, 96(r4) +; CHECK-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-P9-NEXT: stxv vs4, 0(r3) ; CHECK-P9-NEXT: mffprwz r4, f3 -; CHECK-P9-NEXT: mtfprd f3, r4 +; CHECK-P9-NEXT: mtvsrd v3, r4 ; CHECK-P9-NEXT: mffprwz r4, f2 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: vmrglw v3, v5, v4 -; CHECK-P9-NEXT: xxmrgld vs4, v3, v2 -; CHECK-P9-NEXT: xxswapd v2, vs3 -; CHECK-P9-NEXT: vmrglh v0, v0, v1 -; CHECK-P9-NEXT: mtfprd f2, r4 -; CHECK-P9-NEXT: xxswapd v3, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NEXT: mtvsrd v4, r4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r4, f2 -; CHECK-P9-NEXT: mtfprd f2, r4 +; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: mtvsrd v3, r4 ; CHECK-P9-NEXT: mffprwz r4, f1 -; CHECK-P9-NEXT: mtfprd f1, r4 -; CHECK-P9-NEXT: xxswapd v4, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: mtvsrd v4, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r4, f1 -; CHECK-P9-NEXT: mtfprd f1, r4 +; CHECK-P9-NEXT: mtvsrd v4, r4 ; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: vmrglh v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs2 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs1 -; CHECK-P9-NEXT: vmrglw v2, v2, v0 -; CHECK-P9-NEXT: mtfprd f0, r4 -; CHECK-P9-NEXT: xxswapd v5, vs0 -; CHECK-P9-NEXT: vmrglh v4, v4, v5 +; CHECK-P9-NEXT: mtvsrd v5, r4 +; CHECK-P9-NEXT: vmrghh v4, v4, v5 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld vs0, v3, v2 ; CHECK-P9-NEXT: stxv vs0, 16(r3) -; CHECK-P9-NEXT: stxv vs4, 0(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt: @@ -639,12 +579,10 @@ define i32 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 { ; CHECK-P8-NEXT: xscvdpsxws f1, v2 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f1 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: vmrglh v2, v2, v3 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: vmrghh v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprwz r3, f0 ; CHECK-P8-NEXT: blr @@ -653,15 +591,13 @@ define i32 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 { ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: xscvdpsxws f0, v2 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: xxswapd v3, vs0 ; CHECK-P9-NEXT: xxswapd vs0, v2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-P9-NEXT: vextuwrx r3, r3, v2 ; CHECK-P9-NEXT: blr ; @@ -700,18 +636,14 @@ define i64 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mffprwz r3, f2 ; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: mtfprd f2, r3 -; CHECK-P8-NEXT: mtfprd f3, r4 +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: xxswapd v2, vs2 ; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xxswapd v4, vs3 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: xxswapd v5, vs1 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrglh v3, v5, v4 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: vmrghh v2, v4, v2 +; CHECK-P8-NEXT: vmrghh v3, v5, v3 ; CHECK-P8-NEXT: vmrglw v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 @@ -725,22 +657,18 @@ define i64 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xxswapd v2, vs2 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v3, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: vmrglh v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs1 -; CHECK-P9-NEXT: xxswapd v4, vs0 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 +; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr @@ -799,36 +727,28 @@ define <8 x i16> @test8elt_signed(<8 x double>* nocapture readonly) local_unname ; CHECK-P8-NEXT: xxswapd vs3, vs3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r3, f4 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: mffprwz r3, f4 ; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: mtfprd f4, r3 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mffprwz r3, f6 -; CHECK-P8-NEXT: mtfprd f5, r4 -; CHECK-P8-NEXT: xxswapd v2, vs4 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mffprwz r4, f7 -; CHECK-P8-NEXT: mtfprd f6, r3 -; CHECK-P8-NEXT: xxswapd v3, vs5 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mtvsrd v5, r4 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mtfprd f7, r4 -; CHECK-P8-NEXT: xxswapd v4, vs6 ; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: xxswapd v1, vs7 +; CHECK-P8-NEXT: mtvsrd v0, r3 +; CHECK-P8-NEXT: mtvsrd v1, r4 ; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: xxswapd v5, vs0 ; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: mtfprd f2, r3 -; CHECK-P8-NEXT: xxswapd v0, vs1 -; CHECK-P8-NEXT: mtfprd f0, r4 -; CHECK-P8-NEXT: xxswapd v6, vs2 -; CHECK-P8-NEXT: vmrglh v2, v5, v2 -; CHECK-P8-NEXT: xxswapd v5, vs0 -; CHECK-P8-NEXT: vmrglh v3, v0, v3 -; CHECK-P8-NEXT: vmrglh v4, v6, v4 -; CHECK-P8-NEXT: vmrglh v5, v5, v1 +; CHECK-P8-NEXT: vmrghh v2, v0, v2 +; CHECK-P8-NEXT: vmrghh v3, v1, v3 +; CHECK-P8-NEXT: mtvsrd v0, r3 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: vmrghh v4, v0, v4 +; CHECK-P8-NEXT: vmrghh v5, v1, v5 ; CHECK-P8-NEXT: vmrglw v2, v3, v2 ; CHECK-P8-NEXT: vmrglw v3, v5, v4 ; CHECK-P8-NEXT: xxmrgld v2, v3, v2 @@ -840,47 +760,39 @@ define <8 x i16> @test8elt_signed(<8 x double>* nocapture readonly) local_unname ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: lxv vs2, 16(r3) ; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: lxv vs1, 32(r3) -; CHECK-P9-NEXT: lxv vs2, 16(r3) ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtfprd f4, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xxswapd v2, vs4 -; CHECK-P9-NEXT: mtfprd f3, r3 -; CHECK-P9-NEXT: xxswapd v3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtfprd f3, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v4, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: vmrglh v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs3 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs2 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 +; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r3, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs1 -; CHECK-P9-NEXT: xxswapd v5, vs0 -; CHECK-P9-NEXT: vmrglh v4, v4, v5 +; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: vmrghh v4, v4, v5 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld v2, v3, v2 ; CHECK-P9-NEXT: blr @@ -944,209 +856,177 @@ entry: define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <16 x double>* nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: li r5, 16 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: li r6, 32 +; CHECK-P8-NEXT: li r7, 48 ; CHECK-P8-NEXT: lxvd2x vs1, r4, r5 ; CHECK-P8-NEXT: lxvd2x vs2, r4, r6 -; CHECK-P8-NEXT: li r6, 48 -; CHECK-P8-NEXT: lxvd2x vs3, r4, r6 ; CHECK-P8-NEXT: li r6, 64 -; CHECK-P8-NEXT: xscvdpsxws f4, f0 +; CHECK-P8-NEXT: lxvd2x vs3, r4, r7 ; CHECK-P8-NEXT: lxvd2x vs5, r4, r6 -; CHECK-P8-NEXT: li r6, 80 +; CHECK-P8-NEXT: li r7, 80 +; CHECK-P8-NEXT: li r6, 96 +; CHECK-P8-NEXT: xscvdpsxws f4, f0 +; CHECK-P8-NEXT: lxvd2x vs7, r4, r7 +; CHECK-P8-NEXT: lxvd2x vs10, r4, r6 +; CHECK-P8-NEXT: li r6, 112 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: xscvdpsxws f6, f1 -; CHECK-P8-NEXT: lxvd2x vs7, r4, r6 -; CHECK-P8-NEXT: li r6, 96 ; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: xscvdpsxws f8, f2 -; CHECK-P8-NEXT: lxvd2x vs9, r4, r6 -; CHECK-P8-NEXT: li r6, 112 ; CHECK-P8-NEXT: xxswapd vs2, vs2 -; CHECK-P8-NEXT: xscvdpsxws f10, f3 -; CHECK-P8-NEXT: lxvd2x vs11, r4, r6 +; CHECK-P8-NEXT: xscvdpsxws f9, f3 ; CHECK-P8-NEXT: xxswapd vs3, vs3 -; CHECK-P8-NEXT: xscvdpsxws f12, f5 +; CHECK-P8-NEXT: xscvdpsxws f11, f5 ; CHECK-P8-NEXT: xxswapd vs5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f13, f7 +; CHECK-P8-NEXT: xscvdpsxws f12, f7 ; CHECK-P8-NEXT: xxswapd vs7, vs7 -; CHECK-P8-NEXT: xscvdpsxws v2, f9 -; CHECK-P8-NEXT: xxswapd vs9, vs9 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: xscvdpsxws v3, f11 -; CHECK-P8-NEXT: xxswapd vs11, vs11 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mffprwz r6, f6 -; CHECK-P8-NEXT: mtfprd f4, r4 +; CHECK-P8-NEXT: mffprwz r7, f4 +; CHECK-P8-NEXT: lxvd2x vs4, r4, r6 +; CHECK-P8-NEXT: mffprwz r4, f6 +; CHECK-P8-NEXT: xscvdpsxws f13, f10 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mffprwz r4, f8 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xxswapd v4, vs4 -; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: mtfprd f6, r6 -; CHECK-P8-NEXT: mffprwz r6, f10 -; CHECK-P8-NEXT: mtfprd f8, r4 -; CHECK-P8-NEXT: xxswapd v5, vs6 -; CHECK-P8-NEXT: mffprwz r4, f12 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: xxswapd v0, vs8 -; CHECK-P8-NEXT: mtfprd f10, r6 -; CHECK-P8-NEXT: mffprwz r6, f13 -; CHECK-P8-NEXT: mtfprd f12, r4 -; CHECK-P8-NEXT: xxswapd v1, vs10 -; CHECK-P8-NEXT: mfvsrwz r4, v2 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xxswapd v6, vs12 -; CHECK-P8-NEXT: xscvdpsxws f9, f9 -; CHECK-P8-NEXT: mtfprd f13, r6 -; CHECK-P8-NEXT: mfvsrwz r6, v3 -; CHECK-P8-NEXT: mtvsrd v2, r4 -; CHECK-P8-NEXT: xxswapd v7, vs13 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xscvdpsxws f7, f7 -; CHECK-P8-NEXT: xxswapd v2, v2 -; CHECK-P8-NEXT: xscvdpsxws f11, f11 -; CHECK-P8-NEXT: mtvsrd v3, r6 -; CHECK-P8-NEXT: mffprwz r6, f1 -; CHECK-P8-NEXT: mtfprd f0, r4 -; CHECK-P8-NEXT: xxswapd v3, v3 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: mtfprd f1, r6 -; CHECK-P8-NEXT: xxswapd v8, vs0 -; CHECK-P8-NEXT: mtfprd f2, r4 -; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: xxswapd v9, vs1 -; CHECK-P8-NEXT: mffprwz r6, f3 -; CHECK-P8-NEXT: xxswapd v10, vs2 -; CHECK-P8-NEXT: mtfprd f5, r4 +; CHECK-P8-NEXT: xscvdpsxws f6, f4 +; CHECK-P8-NEXT: mtvsrd v4, r4 ; CHECK-P8-NEXT: mffprwz r4, f9 -; CHECK-P8-NEXT: mtfprd f3, r6 -; CHECK-P8-NEXT: mffprwz r6, f7 -; CHECK-P8-NEXT: mtfprd f9, r4 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: mtvsrd v5, r4 ; CHECK-P8-NEXT: mffprwz r4, f11 -; CHECK-P8-NEXT: vmrglh v4, v8, v4 -; CHECK-P8-NEXT: xxswapd v8, vs3 -; CHECK-P8-NEXT: vmrglh v5, v9, v5 -; CHECK-P8-NEXT: xxswapd v9, vs5 -; CHECK-P8-NEXT: mtfprd f7, r6 -; CHECK-P8-NEXT: mtfprd f0, r4 -; CHECK-P8-NEXT: vmrglh v0, v10, v0 -; CHECK-P8-NEXT: xxswapd v10, vs7 -; CHECK-P8-NEXT: vmrglh v1, v8, v1 -; CHECK-P8-NEXT: xxswapd v8, vs9 -; CHECK-P8-NEXT: vmrglh v6, v9, v6 -; CHECK-P8-NEXT: xxswapd v9, vs0 -; CHECK-P8-NEXT: vmrglh v7, v10, v7 -; CHECK-P8-NEXT: vmrglh v2, v8, v2 -; CHECK-P8-NEXT: vmrglh v3, v9, v3 -; CHECK-P8-NEXT: vmrglw v4, v5, v4 -; CHECK-P8-NEXT: vmrglw v5, v1, v0 -; CHECK-P8-NEXT: vmrglw v0, v7, v6 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mffprwz r4, f12 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: mffprwz r4, f13 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: mtvsrd v6, r4 +; CHECK-P8-NEXT: mffprwz r4, f6 +; CHECK-P8-NEXT: xxswapd vs6, vs10 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 +; CHECK-P8-NEXT: mtvsrd v7, r4 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: xxswapd vs0, vs4 +; CHECK-P8-NEXT: mtvsrd v2, r7 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: xscvdpsxws f7, f7 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: xscvdpsxws f4, f6 +; CHECK-P8-NEXT: vmrghh v2, v8, v2 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mffprwz r4, f3 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: vmrghh v3, v9, v3 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: mffprwz r4, f5 +; CHECK-P8-NEXT: vmrghh v4, v8, v4 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mffprwz r4, f7 +; CHECK-P8-NEXT: vmrghh v5, v9, v5 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: mffprwz r4, f4 +; CHECK-P8-NEXT: vmrghh v0, v8, v0 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: vmrghh v1, v9, v1 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: vmrghh v6, v8, v6 +; CHECK-P8-NEXT: vmrghh v7, v9, v7 ; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: vmrglw v3, v5, v4 +; CHECK-P8-NEXT: vmrglw v4, v1, v0 +; CHECK-P8-NEXT: vmrglw v5, v7, v6 +; CHECK-P8-NEXT: xxmrgld v2, v3, v2 +; CHECK-P8-NEXT: stvx v2, 0, r3 ; CHECK-P8-NEXT: xxmrgld v3, v5, v4 -; CHECK-P8-NEXT: stvx v3, 0, r3 -; CHECK-P8-NEXT: xxmrgld v2, v2, v0 -; CHECK-P8-NEXT: stvx v2, r3, r5 +; CHECK-P8-NEXT: stvx v3, r3, r5 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs4, 0(r4) -; CHECK-P9-NEXT: lxv vs3, 16(r4) -; CHECK-P9-NEXT: lxv vs2, 32(r4) -; CHECK-P9-NEXT: xscvdpsxws f5, f4 -; CHECK-P9-NEXT: lxv vs1, 48(r4) -; CHECK-P9-NEXT: xscvdpsxws f6, f3 -; CHECK-P9-NEXT: lxv vs0, 64(r4) -; CHECK-P9-NEXT: xscvdpsxws f7, f2 -; CHECK-P9-NEXT: xscvdpsxws f8, f1 -; CHECK-P9-NEXT: xxswapd vs4, vs4 -; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: mffprwz r5, f5 -; CHECK-P9-NEXT: xscvdpsxws f9, f0 +; CHECK-P9-NEXT: lxv vs3, 0(r4) +; CHECK-P9-NEXT: lxv vs2, 16(r4) +; CHECK-P9-NEXT: lxv vs1, 32(r4) +; CHECK-P9-NEXT: xscvdpsxws f4, f3 +; CHECK-P9-NEXT: lxv vs0, 48(r4) +; CHECK-P9-NEXT: xscvdpsxws f5, f2 +; CHECK-P9-NEXT: xscvdpsxws f6, f1 ; CHECK-P9-NEXT: xxswapd vs3, vs3 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mtfprd f5, r5 -; CHECK-P9-NEXT: mffprwz r5, f6 -; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mtfprd f6, r5 -; CHECK-P9-NEXT: mffprwz r5, f7 -; CHECK-P9-NEXT: mtfprd f7, r5 -; CHECK-P9-NEXT: mffprwz r5, f8 -; CHECK-P9-NEXT: mtfprd f8, r5 -; CHECK-P9-NEXT: mffprwz r5, f9 -; CHECK-P9-NEXT: mtfprd f9, r5 -; CHECK-P9-NEXT: mffprwz r5, f4 -; CHECK-P9-NEXT: mtfprd f4, r5 -; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: xxswapd v2, vs5 -; CHECK-P9-NEXT: xxswapd v5, vs8 -; CHECK-P9-NEXT: xxswapd v0, vs9 -; CHECK-P9-NEXT: mtfprd f3, r5 -; CHECK-P9-NEXT: mffprwz r5, f2 -; CHECK-P9-NEXT: mtfprd f2, r5 +; CHECK-P9-NEXT: xscvdpsxws f7, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: xxswapd v1, vs2 +; CHECK-P9-NEXT: mffprwz r5, f4 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: xxswapd vs2, vs2 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mtvsrd v2, r5 +; CHECK-P9-NEXT: mffprwz r5, f5 +; CHECK-P9-NEXT: mtvsrd v3, r5 +; CHECK-P9-NEXT: mffprwz r5, f6 +; CHECK-P9-NEXT: mtvsrd v4, r5 +; CHECK-P9-NEXT: mffprwz r5, f7 +; CHECK-P9-NEXT: mtvsrd v5, r5 +; CHECK-P9-NEXT: mffprwz r5, f3 +; CHECK-P9-NEXT: lxv vs3, 64(r4) +; CHECK-P9-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mtvsrd v0, r5 +; CHECK-P9-NEXT: mffprwz r5, f2 ; CHECK-P9-NEXT: lxv vs2, 80(r4) -; CHECK-P9-NEXT: xxswapd v3, vs4 -; CHECK-P9-NEXT: vmrglh v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs6 -; CHECK-P9-NEXT: xxswapd v4, vs3 +; CHECK-P9-NEXT: vmrghh v2, v2, v0 +; CHECK-P9-NEXT: mtvsrd v0, r5 +; CHECK-P9-NEXT: mffprwz r5, f1 +; CHECK-P9-NEXT: lxv vs1, 96(r4) +; CHECK-P9-NEXT: xscvdpsxws f4, f3 +; CHECK-P9-NEXT: xxswapd vs3, vs3 +; CHECK-P9-NEXT: vmrghh v3, v3, v0 +; CHECK-P9-NEXT: mtvsrd v0, r5 +; CHECK-P9-NEXT: mffprwz r5, f0 +; CHECK-P9-NEXT: lxv vs0, 112(r4) +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: vmrghh v4, v4, v0 +; CHECK-P9-NEXT: mtvsrd v0, r5 +; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: vmrghh v5, v5, v0 +; CHECK-P9-NEXT: mffprwz r4, f4 +; CHECK-P9-NEXT: vmrglw v4, v5, v4 +; CHECK-P9-NEXT: mtvsrd v3, r4 +; CHECK-P9-NEXT: mffprwz r4, f3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: mffprwz r5, f1 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs7 -; CHECK-P9-NEXT: mtfprd f1, r5 -; CHECK-P9-NEXT: mffprwz r5, f0 -; CHECK-P9-NEXT: vmrglh v4, v4, v1 -; CHECK-P9-NEXT: xxswapd v1, vs1 -; CHECK-P9-NEXT: mtfprd f0, r5 -; CHECK-P9-NEXT: vmrglh v5, v5, v1 +; CHECK-P9-NEXT: xxmrgld vs4, v4, v2 +; CHECK-P9-NEXT: mtvsrd v2, r4 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: xxswapd v1, vs0 -; CHECK-P9-NEXT: lxv vs0, 112(r4) -; CHECK-P9-NEXT: lxv vs1, 96(r4) +; CHECK-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-P9-NEXT: stxv vs4, 0(r3) ; CHECK-P9-NEXT: mffprwz r4, f3 -; CHECK-P9-NEXT: mtfprd f3, r4 +; CHECK-P9-NEXT: mtvsrd v3, r4 ; CHECK-P9-NEXT: mffprwz r4, f2 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: vmrglw v3, v5, v4 -; CHECK-P9-NEXT: xxmrgld vs4, v3, v2 -; CHECK-P9-NEXT: xxswapd v2, vs3 -; CHECK-P9-NEXT: vmrglh v0, v0, v1 -; CHECK-P9-NEXT: mtfprd f2, r4 -; CHECK-P9-NEXT: xxswapd v3, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NEXT: mtvsrd v4, r4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r4, f2 -; CHECK-P9-NEXT: mtfprd f2, r4 +; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: mtvsrd v3, r4 ; CHECK-P9-NEXT: mffprwz r4, f1 -; CHECK-P9-NEXT: mtfprd f1, r4 -; CHECK-P9-NEXT: xxswapd v4, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: mtvsrd v4, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r4, f1 -; CHECK-P9-NEXT: mtfprd f1, r4 +; CHECK-P9-NEXT: mtvsrd v4, r4 ; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: vmrglh v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs2 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs1 -; CHECK-P9-NEXT: vmrglw v2, v2, v0 -; CHECK-P9-NEXT: mtfprd f0, r4 -; CHECK-P9-NEXT: xxswapd v5, vs0 -; CHECK-P9-NEXT: vmrglh v4, v4, v5 +; CHECK-P9-NEXT: mtvsrd v5, r4 +; CHECK-P9-NEXT: vmrghh v4, v4, v5 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld vs0, v3, v2 ; CHECK-P9-NEXT: stxv vs0, 16(r3) -; CHECK-P9-NEXT: stxv vs4, 0(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt_signed: diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll index 369fb3f10100..173ced964ad6 100644 --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll @@ -16,12 +16,10 @@ define i64 @test2elt(<2 x double> %a) local_unnamed_addr #0 { ; CHECK-P8-NEXT: xscvdpuxws f1, v2 ; CHECK-P8-NEXT: xscvdpuxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f1 +; CHECK-P8-NEXT: mtvsrwz v2, r3 ; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: vmrglw v2, v2, v3 +; CHECK-P8-NEXT: mtvsrwz v3, r4 +; CHECK-P8-NEXT: vmrghw v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr @@ -35,7 +33,7 @@ define i64 @test2elt(<2 x double> %a) local_unnamed_addr #0 { ; CHECK-P9-NEXT: xscvdpuxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: mtvsrws v2, r3 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: vmrghw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; @@ -310,12 +308,10 @@ define i64 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 { ; CHECK-P8-NEXT: xscvdpsxws f1, v2 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f1 +; CHECK-P8-NEXT: mtvsrwz v2, r3 ; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: vmrglw v2, v2, v3 +; CHECK-P8-NEXT: mtvsrwz v3, r4 +; CHECK-P8-NEXT: vmrghw v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr @@ -329,7 +325,7 @@ define i64 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 { ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: mtvsrws v2, r3 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: vmrghw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll index fb13d1bd71f5..fd28d9a1afdc 100644 --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll @@ -16,12 +16,10 @@ define i16 @test2elt(<2 x double> %a) local_unnamed_addr #0 { ; CHECK-P8-NEXT: xscvdpsxws f1, v2 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f1 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: vmrglb v2, v2, v3 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: vmrghb v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: clrldi r3, r3, 48 @@ -33,15 +31,13 @@ define i16 @test2elt(<2 x double> %a) local_unnamed_addr #0 { ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: xscvdpsxws f0, v2 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: xxswapd v3, vs0 ; CHECK-P9-NEXT: xxswapd vs0, v2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: addi r3, r1, -2 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: vmrglb v2, v3, v2 +; CHECK-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-P9-NEXT: vsldoi v2, v2, v2, 8 ; CHECK-P9-NEXT: stxsihx v2, 0, r3 ; CHECK-P9-NEXT: lhz r3, -2(r1) @@ -84,18 +80,14 @@ define i32 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 { ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mffprwz r3, f2 ; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: mtfprd f2, r3 -; CHECK-P8-NEXT: mtfprd f3, r4 +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: xxswapd v2, vs2 ; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xxswapd v4, vs3 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: xxswapd v5, vs1 -; CHECK-P8-NEXT: vmrglb v2, v3, v2 -; CHECK-P8-NEXT: vmrglb v3, v5, v4 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: vmrghb v2, v4, v2 +; CHECK-P8-NEXT: vmrghb v3, v5, v3 ; CHECK-P8-NEXT: vmrglh v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprwz r3, f0 @@ -109,24 +101,20 @@ define i32 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 { ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xxswapd v2, vs2 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v3, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghb v2, v2, v3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: vmrglb v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs1 -; CHECK-P9-NEXT: xxswapd v4, vs0 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: li r3, 0 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 +; CHECK-P9-NEXT: vmrglh v2, v3, v2 ; CHECK-P9-NEXT: vextuwrx r3, r3, v2 ; CHECK-P9-NEXT: blr ; @@ -185,36 +173,28 @@ define i64 @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr #1 { ; CHECK-P8-NEXT: xxswapd vs3, vs3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r3, f4 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: mffprwz r3, f4 ; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: mtfprd f4, r3 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mffprwz r3, f6 -; CHECK-P8-NEXT: mtfprd f5, r4 -; CHECK-P8-NEXT: xxswapd v2, vs4 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mffprwz r4, f7 -; CHECK-P8-NEXT: mtfprd f6, r3 -; CHECK-P8-NEXT: xxswapd v3, vs5 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mtvsrd v5, r4 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mtfprd f7, r4 -; CHECK-P8-NEXT: xxswapd v4, vs6 ; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: xxswapd v1, vs7 +; CHECK-P8-NEXT: mtvsrd v0, r3 +; CHECK-P8-NEXT: mtvsrd v1, r4 ; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: xxswapd v5, vs0 ; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: mtfprd f2, r3 -; CHECK-P8-NEXT: xxswapd v0, vs1 -; CHECK-P8-NEXT: mtfprd f0, r4 -; CHECK-P8-NEXT: xxswapd v6, vs2 -; CHECK-P8-NEXT: vmrglb v2, v5, v2 -; CHECK-P8-NEXT: xxswapd v5, vs0 -; CHECK-P8-NEXT: vmrglb v3, v0, v3 -; CHECK-P8-NEXT: vmrglb v4, v6, v4 -; CHECK-P8-NEXT: vmrglb v5, v5, v1 +; CHECK-P8-NEXT: vmrghb v2, v0, v2 +; CHECK-P8-NEXT: vmrghb v3, v1, v3 +; CHECK-P8-NEXT: mtvsrd v0, r3 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: vmrghb v4, v0, v4 +; CHECK-P8-NEXT: vmrghb v5, v1, v5 ; CHECK-P8-NEXT: vmrglh v2, v3, v2 ; CHECK-P8-NEXT: vmrglh v3, v5, v4 ; CHECK-P8-NEXT: vmrglw v2, v3, v2 @@ -228,47 +208,39 @@ define i64 @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr #1 { ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: lxv vs2, 16(r3) ; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: lxv vs1, 32(r3) -; CHECK-P9-NEXT: lxv vs2, 16(r3) ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtfprd f4, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xxswapd v2, vs4 -; CHECK-P9-NEXT: mtfprd f3, r3 -; CHECK-P9-NEXT: xxswapd v3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghb v2, v2, v3 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtfprd f3, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v4, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: vmrglb v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs3 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 ; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs2 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 +; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r3, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs1 -; CHECK-P9-NEXT: xxswapd v5, vs0 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 +; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: vmrglh v3, v4, v3 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 @@ -364,79 +336,63 @@ define <16 x i8> @test16elt(<16 x double>* nocapture readonly) local_unnamed_add ; CHECK-P8-NEXT: xxswapd vs7, vs7 ; CHECK-P8-NEXT: xscvdpsxws v2, f9 ; CHECK-P8-NEXT: xxswapd vs9, vs9 -; CHECK-P8-NEXT: mffprwz r3, f4 ; CHECK-P8-NEXT: xscvdpsxws v3, f11 ; CHECK-P8-NEXT: xxswapd vs11, vs11 +; CHECK-P8-NEXT: mffprwz r3, f4 ; CHECK-P8-NEXT: mffprwz r4, f6 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mtfprd f4, r3 -; CHECK-P8-NEXT: mffprwz r3, f8 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xxswapd v4, vs4 -; CHECK-P8-NEXT: mtfprd f6, r4 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mffprwz r3, f8 +; CHECK-P8-NEXT: mtvsrd v5, r4 ; CHECK-P8-NEXT: mffprwz r4, f10 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xxswapd v5, vs6 -; CHECK-P8-NEXT: mtfprd f8, r3 -; CHECK-P8-NEXT: mffprwz r3, f12 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xxswapd v0, vs8 -; CHECK-P8-NEXT: mtfprd f10, r4 +; CHECK-P8-NEXT: mtvsrd v0, r3 +; CHECK-P8-NEXT: mffprwz r3, f12 +; CHECK-P8-NEXT: mtvsrd v1, r4 ; CHECK-P8-NEXT: mffprwz r4, f13 ; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: xxswapd v1, vs10 -; CHECK-P8-NEXT: mtfprd f12, r3 -; CHECK-P8-NEXT: mfvsrwz r3, v2 ; CHECK-P8-NEXT: xscvdpsxws f7, f7 -; CHECK-P8-NEXT: xxswapd v6, vs12 -; CHECK-P8-NEXT: mtfprd f13, r4 +; CHECK-P8-NEXT: mtvsrd v6, r3 +; CHECK-P8-NEXT: mfvsrwz r3, v2 +; CHECK-P8-NEXT: mtvsrd v2, r4 ; CHECK-P8-NEXT: mfvsrwz r4, v3 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: xxswapd v7, vs13 -; CHECK-P8-NEXT: mffprwz r3, f0 ; CHECK-P8-NEXT: xscvdpsxws f9, f9 -; CHECK-P8-NEXT: xxswapd v2, v2 ; CHECK-P8-NEXT: xscvdpsxws f11, f11 -; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: mtvsrd v3, r3 +; CHECK-P8-NEXT: mtvsrd v7, r4 +; CHECK-P8-NEXT: mffprwz r3, f0 ; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: xxswapd v3, v3 +; CHECK-P8-NEXT: mtvsrd v8, r3 +; CHECK-P8-NEXT: mtvsrd v9, r4 ; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: xxswapd v8, vs0 ; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: mtfprd f2, r3 -; CHECK-P8-NEXT: xxswapd v9, vs1 +; CHECK-P8-NEXT: vmrghb v4, v8, v4 +; CHECK-P8-NEXT: vmrghb v5, v9, v5 +; CHECK-P8-NEXT: mtvsrd v8, r3 +; CHECK-P8-NEXT: mtvsrd v9, r4 ; CHECK-P8-NEXT: mffprwz r3, f5 -; CHECK-P8-NEXT: mtfprd f3, r4 -; CHECK-P8-NEXT: xxswapd v10, vs2 ; CHECK-P8-NEXT: mffprwz r4, f7 -; CHECK-P8-NEXT: mtfprd f5, r3 +; CHECK-P8-NEXT: vmrghb v0, v8, v0 +; CHECK-P8-NEXT: vmrghb v1, v9, v1 +; CHECK-P8-NEXT: mtvsrd v8, r3 +; CHECK-P8-NEXT: mtvsrd v9, r4 ; CHECK-P8-NEXT: mffprwz r3, f9 -; CHECK-P8-NEXT: mtfprd f7, r4 ; CHECK-P8-NEXT: mffprwz r4, f11 -; CHECK-P8-NEXT: vmrglb v4, v8, v4 -; CHECK-P8-NEXT: xxswapd v8, vs3 -; CHECK-P8-NEXT: vmrglb v5, v9, v5 -; CHECK-P8-NEXT: xxswapd v9, vs5 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: vmrglb v0, v10, v0 -; CHECK-P8-NEXT: xxswapd v10, vs7 -; CHECK-P8-NEXT: vmrglb v1, v8, v1 -; CHECK-P8-NEXT: xxswapd v8, vs0 -; CHECK-P8-NEXT: vmrglb v6, v9, v6 -; CHECK-P8-NEXT: xxswapd v9, vs1 -; CHECK-P8-NEXT: vmrglb v7, v10, v7 -; CHECK-P8-NEXT: vmrglb v2, v8, v2 -; CHECK-P8-NEXT: vmrglb v3, v9, v3 +; CHECK-P8-NEXT: vmrghb v6, v8, v6 +; CHECK-P8-NEXT: vmrghb v2, v9, v2 +; CHECK-P8-NEXT: mtvsrd v8, r3 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: vmrghb v3, v8, v3 +; CHECK-P8-NEXT: vmrghb v7, v9, v7 ; CHECK-P8-NEXT: vmrglh v4, v5, v4 ; CHECK-P8-NEXT: vmrglh v5, v1, v0 -; CHECK-P8-NEXT: vmrglh v0, v7, v6 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrglw v3, v5, v4 -; CHECK-P8-NEXT: vmrglw v2, v2, v0 -; CHECK-P8-NEXT: xxmrgld v2, v2, v3 +; CHECK-P8-NEXT: vmrglh v2, v2, v6 +; CHECK-P8-NEXT: vmrglh v3, v7, v3 +; CHECK-P8-NEXT: vmrglw v4, v5, v4 +; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: xxmrgld v2, v2, v4 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt: @@ -445,94 +401,78 @@ define <16 x i8> @test16elt(<16 x double>* nocapture readonly) local_unnamed_add ; CHECK-P9-NEXT: xscvdpsxws f8, f7 ; CHECK-P9-NEXT: xxswapd vs7, vs7 ; CHECK-P9-NEXT: xscvdpsxws f7, f7 +; CHECK-P9-NEXT: lxv vs6, 16(r3) ; CHECK-P9-NEXT: lxv vs0, 112(r3) ; CHECK-P9-NEXT: lxv vs1, 96(r3) ; CHECK-P9-NEXT: lxv vs2, 80(r3) ; CHECK-P9-NEXT: lxv vs3, 64(r3) ; CHECK-P9-NEXT: lxv vs4, 48(r3) ; CHECK-P9-NEXT: lxv vs5, 32(r3) -; CHECK-P9-NEXT: lxv vs6, 16(r3) ; CHECK-P9-NEXT: mffprwz r3, f8 -; CHECK-P9-NEXT: mtfprd f8, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f7 -; CHECK-P9-NEXT: xxswapd v2, vs8 -; CHECK-P9-NEXT: mtfprd f7, r3 -; CHECK-P9-NEXT: xxswapd v3, vs7 ; CHECK-P9-NEXT: xscvdpsxws f7, f6 ; CHECK-P9-NEXT: xxswapd vs6, vs6 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f6, f6 +; CHECK-P9-NEXT: vmrghb v2, v2, v3 ; CHECK-P9-NEXT: mffprwz r3, f7 -; CHECK-P9-NEXT: mtfprd f7, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mffprwz r3, f6 -; CHECK-P9-NEXT: mtfprd f6, r3 -; CHECK-P9-NEXT: xxswapd v4, vs6 ; CHECK-P9-NEXT: xscvdpsxws f6, f5 ; CHECK-P9-NEXT: xxswapd vs5, vs5 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f5, f5 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f6 -; CHECK-P9-NEXT: mtfprd f6, r3 -; CHECK-P9-NEXT: mffprwz r3, f5 -; CHECK-P9-NEXT: vmrglb v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs7 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 ; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs6 -; CHECK-P9-NEXT: mtfprd f5, r3 -; CHECK-P9-NEXT: xxswapd v4, vs5 +; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r3, f5 ; CHECK-P9-NEXT: xscvdpsxws f5, f4 ; CHECK-P9-NEXT: xxswapd vs4, vs4 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f5 -; CHECK-P9-NEXT: mtfprd f5, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtfprd f4, r3 -; CHECK-P9-NEXT: xxswapd v5, vs4 ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs5 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: vmrglh v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtfprd f4, r3 +; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtfprd f3, r3 -; CHECK-P9-NEXT: xxswapd v4, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtfprd f3, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v5, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs4 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs3 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 +; CHECK-P9-NEXT: vmrglh v3, v4, v3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xxswapd v4, vs2 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v5, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 -; CHECK-P9-NEXT: xxswapd v5, vs1 -; CHECK-P9-NEXT: xxswapd v0, vs0 -; CHECK-P9-NEXT: vmrglb v5, v5, v0 +; CHECK-P9-NEXT: mtvsrd v0, r3 +; CHECK-P9-NEXT: vmrghb v5, v5, v0 ; CHECK-P9-NEXT: vmrglh v4, v5, v4 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld v2, v3, v2 @@ -649,12 +589,10 @@ define i16 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 { ; CHECK-P8-NEXT: xscvdpsxws f1, v2 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f1 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: vmrglb v2, v2, v3 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: vmrghb v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: clrldi r3, r3, 48 @@ -666,15 +604,13 @@ define i16 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 { ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: xscvdpsxws f0, v2 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: xxswapd v3, vs0 ; CHECK-P9-NEXT: xxswapd vs0, v2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: addi r3, r1, -2 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: vmrglb v2, v3, v2 +; CHECK-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-P9-NEXT: vsldoi v2, v2, v2, 8 ; CHECK-P9-NEXT: stxsihx v2, 0, r3 ; CHECK-P9-NEXT: lhz r3, -2(r1) @@ -717,18 +653,14 @@ define i32 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mffprwz r3, f2 ; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: mtfprd f2, r3 -; CHECK-P8-NEXT: mtfprd f3, r4 +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: xxswapd v2, vs2 ; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xxswapd v4, vs3 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: xxswapd v5, vs1 -; CHECK-P8-NEXT: vmrglb v2, v3, v2 -; CHECK-P8-NEXT: vmrglb v3, v5, v4 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: vmrghb v2, v4, v2 +; CHECK-P8-NEXT: vmrghb v3, v5, v3 ; CHECK-P8-NEXT: vmrglh v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprwz r3, f0 @@ -742,24 +674,20 @@ define i32 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xxswapd v2, vs2 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v3, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghb v2, v2, v3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: vmrglb v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs1 -; CHECK-P9-NEXT: xxswapd v4, vs0 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: li r3, 0 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 +; CHECK-P9-NEXT: vmrglh v2, v3, v2 ; CHECK-P9-NEXT: vextuwrx r3, r3, v2 ; CHECK-P9-NEXT: blr ; @@ -818,36 +746,28 @@ define i64 @test8elt_signed(<8 x double>* nocapture readonly) local_unnamed_addr ; CHECK-P8-NEXT: xxswapd vs3, vs3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r3, f4 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: mffprwz r3, f4 ; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: mtfprd f4, r3 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mffprwz r3, f6 -; CHECK-P8-NEXT: mtfprd f5, r4 -; CHECK-P8-NEXT: xxswapd v2, vs4 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mffprwz r4, f7 -; CHECK-P8-NEXT: mtfprd f6, r3 -; CHECK-P8-NEXT: xxswapd v3, vs5 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mtvsrd v5, r4 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mtfprd f7, r4 -; CHECK-P8-NEXT: xxswapd v4, vs6 ; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: xxswapd v1, vs7 +; CHECK-P8-NEXT: mtvsrd v0, r3 +; CHECK-P8-NEXT: mtvsrd v1, r4 ; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: xxswapd v5, vs0 ; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: mtfprd f2, r3 -; CHECK-P8-NEXT: xxswapd v0, vs1 -; CHECK-P8-NEXT: mtfprd f0, r4 -; CHECK-P8-NEXT: xxswapd v6, vs2 -; CHECK-P8-NEXT: vmrglb v2, v5, v2 -; CHECK-P8-NEXT: xxswapd v5, vs0 -; CHECK-P8-NEXT: vmrglb v3, v0, v3 -; CHECK-P8-NEXT: vmrglb v4, v6, v4 -; CHECK-P8-NEXT: vmrglb v5, v5, v1 +; CHECK-P8-NEXT: vmrghb v2, v0, v2 +; CHECK-P8-NEXT: vmrghb v3, v1, v3 +; CHECK-P8-NEXT: mtvsrd v0, r3 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: vmrghb v4, v0, v4 +; CHECK-P8-NEXT: vmrghb v5, v1, v5 ; CHECK-P8-NEXT: vmrglh v2, v3, v2 ; CHECK-P8-NEXT: vmrglh v3, v5, v4 ; CHECK-P8-NEXT: vmrglw v2, v3, v2 @@ -861,47 +781,39 @@ define i64 @test8elt_signed(<8 x double>* nocapture readonly) local_unnamed_addr ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: lxv vs2, 16(r3) ; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: lxv vs1, 32(r3) -; CHECK-P9-NEXT: lxv vs2, 16(r3) ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtfprd f4, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xxswapd v2, vs4 -; CHECK-P9-NEXT: mtfprd f3, r3 -; CHECK-P9-NEXT: xxswapd v3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghb v2, v2, v3 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtfprd f3, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v4, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: vmrglb v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs3 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 ; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs2 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 +; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r3, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs1 -; CHECK-P9-NEXT: xxswapd v5, vs0 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 +; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: vmrglh v3, v4, v3 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 @@ -997,79 +909,63 @@ define <16 x i8> @test16elt_signed(<16 x double>* nocapture readonly) local_unna ; CHECK-P8-NEXT: xxswapd vs7, vs7 ; CHECK-P8-NEXT: xscvdpsxws v2, f9 ; CHECK-P8-NEXT: xxswapd vs9, vs9 -; CHECK-P8-NEXT: mffprwz r3, f4 ; CHECK-P8-NEXT: xscvdpsxws v3, f11 ; CHECK-P8-NEXT: xxswapd vs11, vs11 +; CHECK-P8-NEXT: mffprwz r3, f4 ; CHECK-P8-NEXT: mffprwz r4, f6 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mtfprd f4, r3 -; CHECK-P8-NEXT: mffprwz r3, f8 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xxswapd v4, vs4 -; CHECK-P8-NEXT: mtfprd f6, r4 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mffprwz r3, f8 +; CHECK-P8-NEXT: mtvsrd v5, r4 ; CHECK-P8-NEXT: mffprwz r4, f10 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xxswapd v5, vs6 -; CHECK-P8-NEXT: mtfprd f8, r3 -; CHECK-P8-NEXT: mffprwz r3, f12 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xxswapd v0, vs8 -; CHECK-P8-NEXT: mtfprd f10, r4 +; CHECK-P8-NEXT: mtvsrd v0, r3 +; CHECK-P8-NEXT: mffprwz r3, f12 +; CHECK-P8-NEXT: mtvsrd v1, r4 ; CHECK-P8-NEXT: mffprwz r4, f13 ; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: xxswapd v1, vs10 -; CHECK-P8-NEXT: mtfprd f12, r3 -; CHECK-P8-NEXT: mfvsrwz r3, v2 ; CHECK-P8-NEXT: xscvdpsxws f7, f7 -; CHECK-P8-NEXT: xxswapd v6, vs12 -; CHECK-P8-NEXT: mtfprd f13, r4 +; CHECK-P8-NEXT: mtvsrd v6, r3 +; CHECK-P8-NEXT: mfvsrwz r3, v2 +; CHECK-P8-NEXT: mtvsrd v2, r4 ; CHECK-P8-NEXT: mfvsrwz r4, v3 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: xxswapd v7, vs13 -; CHECK-P8-NEXT: mffprwz r3, f0 ; CHECK-P8-NEXT: xscvdpsxws f9, f9 -; CHECK-P8-NEXT: xxswapd v2, v2 ; CHECK-P8-NEXT: xscvdpsxws f11, f11 -; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: mtvsrd v3, r3 +; CHECK-P8-NEXT: mtvsrd v7, r4 +; CHECK-P8-NEXT: mffprwz r3, f0 ; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: xxswapd v3, v3 +; CHECK-P8-NEXT: mtvsrd v8, r3 +; CHECK-P8-NEXT: mtvsrd v9, r4 ; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: xxswapd v8, vs0 ; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: mtfprd f2, r3 -; CHECK-P8-NEXT: xxswapd v9, vs1 +; CHECK-P8-NEXT: vmrghb v4, v8, v4 +; CHECK-P8-NEXT: vmrghb v5, v9, v5 +; CHECK-P8-NEXT: mtvsrd v8, r3 +; CHECK-P8-NEXT: mtvsrd v9, r4 ; CHECK-P8-NEXT: mffprwz r3, f5 -; CHECK-P8-NEXT: mtfprd f3, r4 -; CHECK-P8-NEXT: xxswapd v10, vs2 ; CHECK-P8-NEXT: mffprwz r4, f7 -; CHECK-P8-NEXT: mtfprd f5, r3 +; CHECK-P8-NEXT: vmrghb v0, v8, v0 +; CHECK-P8-NEXT: vmrghb v1, v9, v1 +; CHECK-P8-NEXT: mtvsrd v8, r3 +; CHECK-P8-NEXT: mtvsrd v9, r4 ; CHECK-P8-NEXT: mffprwz r3, f9 -; CHECK-P8-NEXT: mtfprd f7, r4 ; CHECK-P8-NEXT: mffprwz r4, f11 -; CHECK-P8-NEXT: vmrglb v4, v8, v4 -; CHECK-P8-NEXT: xxswapd v8, vs3 -; CHECK-P8-NEXT: vmrglb v5, v9, v5 -; CHECK-P8-NEXT: xxswapd v9, vs5 -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: vmrglb v0, v10, v0 -; CHECK-P8-NEXT: xxswapd v10, vs7 -; CHECK-P8-NEXT: vmrglb v1, v8, v1 -; CHECK-P8-NEXT: xxswapd v8, vs0 -; CHECK-P8-NEXT: vmrglb v6, v9, v6 -; CHECK-P8-NEXT: xxswapd v9, vs1 -; CHECK-P8-NEXT: vmrglb v7, v10, v7 -; CHECK-P8-NEXT: vmrglb v2, v8, v2 -; CHECK-P8-NEXT: vmrglb v3, v9, v3 +; CHECK-P8-NEXT: vmrghb v6, v8, v6 +; CHECK-P8-NEXT: vmrghb v2, v9, v2 +; CHECK-P8-NEXT: mtvsrd v8, r3 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: vmrghb v3, v8, v3 +; CHECK-P8-NEXT: vmrghb v7, v9, v7 ; CHECK-P8-NEXT: vmrglh v4, v5, v4 ; CHECK-P8-NEXT: vmrglh v5, v1, v0 -; CHECK-P8-NEXT: vmrglh v0, v7, v6 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrglw v3, v5, v4 -; CHECK-P8-NEXT: vmrglw v2, v2, v0 -; CHECK-P8-NEXT: xxmrgld v2, v2, v3 +; CHECK-P8-NEXT: vmrglh v2, v2, v6 +; CHECK-P8-NEXT: vmrglh v3, v7, v3 +; CHECK-P8-NEXT: vmrglw v4, v5, v4 +; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: xxmrgld v2, v2, v4 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt_signed: @@ -1078,94 +974,78 @@ define <16 x i8> @test16elt_signed(<16 x double>* nocapture readonly) local_unna ; CHECK-P9-NEXT: xscvdpsxws f8, f7 ; CHECK-P9-NEXT: xxswapd vs7, vs7 ; CHECK-P9-NEXT: xscvdpsxws f7, f7 +; CHECK-P9-NEXT: lxv vs6, 16(r3) ; CHECK-P9-NEXT: lxv vs0, 112(r3) ; CHECK-P9-NEXT: lxv vs1, 96(r3) ; CHECK-P9-NEXT: lxv vs2, 80(r3) ; CHECK-P9-NEXT: lxv vs3, 64(r3) ; CHECK-P9-NEXT: lxv vs4, 48(r3) ; CHECK-P9-NEXT: lxv vs5, 32(r3) -; CHECK-P9-NEXT: lxv vs6, 16(r3) ; CHECK-P9-NEXT: mffprwz r3, f8 -; CHECK-P9-NEXT: mtfprd f8, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f7 -; CHECK-P9-NEXT: xxswapd v2, vs8 -; CHECK-P9-NEXT: mtfprd f7, r3 -; CHECK-P9-NEXT: xxswapd v3, vs7 ; CHECK-P9-NEXT: xscvdpsxws f7, f6 ; CHECK-P9-NEXT: xxswapd vs6, vs6 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f6, f6 +; CHECK-P9-NEXT: vmrghb v2, v2, v3 ; CHECK-P9-NEXT: mffprwz r3, f7 -; CHECK-P9-NEXT: mtfprd f7, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mffprwz r3, f6 -; CHECK-P9-NEXT: mtfprd f6, r3 -; CHECK-P9-NEXT: xxswapd v4, vs6 ; CHECK-P9-NEXT: xscvdpsxws f6, f5 ; CHECK-P9-NEXT: xxswapd vs5, vs5 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f5, f5 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f6 -; CHECK-P9-NEXT: mtfprd f6, r3 -; CHECK-P9-NEXT: mffprwz r3, f5 -; CHECK-P9-NEXT: vmrglb v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs7 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 ; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs6 -; CHECK-P9-NEXT: mtfprd f5, r3 -; CHECK-P9-NEXT: xxswapd v4, vs5 +; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r3, f5 ; CHECK-P9-NEXT: xscvdpsxws f5, f4 ; CHECK-P9-NEXT: xxswapd vs4, vs4 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f5 -; CHECK-P9-NEXT: mtfprd f5, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtfprd f4, r3 -; CHECK-P9-NEXT: xxswapd v5, vs4 ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs5 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: vmrglh v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtfprd f4, r3 +; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtfprd f3, r3 -; CHECK-P9-NEXT: xxswapd v4, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtfprd f3, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 -; CHECK-P9-NEXT: xxswapd v5, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs4 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs3 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtfprd f2, r3 +; CHECK-P9-NEXT: vmrglh v3, v4, v3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xxswapd v4, vs2 -; CHECK-P9-NEXT: mtfprd f1, r3 -; CHECK-P9-NEXT: xxswapd v5, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtfprd f1, r3 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 -; CHECK-P9-NEXT: xxswapd v5, vs1 -; CHECK-P9-NEXT: xxswapd v0, vs0 -; CHECK-P9-NEXT: vmrglb v5, v5, v0 +; CHECK-P9-NEXT: mtvsrd v0, r3 +; CHECK-P9-NEXT: vmrghb v5, v5, v0 ; CHECK-P9-NEXT: vmrglh v4, v5, v4 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld v2, v3, v2 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll index e51af62cb128..5ecd34941b39 100644 --- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll @@ -24,9 +24,9 @@ define i64 @test2elt(i32 %a.coerce) local_unnamed_addr #0 { ; CHECK-P8-NEXT: xscvuxdsp f1, f1 ; CHECK-P8-NEXT: xscvdpspn vs0, f0 ; CHECK-P8-NEXT: xscvdpspn vs1, f1 -; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 1 -; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 1 -; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 3 +; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 3 +; CHECK-P8-NEXT: vmrghw v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr @@ -43,12 +43,12 @@ define i64 @test2elt(i32 %a.coerce) local_unnamed_addr #0 { ; CHECK-P9-NEXT: xscvdpspn vs0, f0 ; CHECK-P9-NEXT: vextuhrx r3, r3, v2 ; CHECK-P9-NEXT: clrlwi r3, r3, 16 -; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 1 +; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3 ; CHECK-P9-NEXT: mtfprwz f0, r3 ; CHECK-P9-NEXT: xscvuxdsp f0, f0 ; CHECK-P9-NEXT: xscvdpspn vs0, f0 -; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 1 -; CHECK-P9-NEXT: vmrglw v2, v2, v3 +; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 3 +; CHECK-P9-NEXT: vmrghw v2, v2, v3 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; @@ -80,25 +80,17 @@ entry: define <4 x float> @test4elt(i64 %a.coerce) local_unnamed_addr #1 { ; CHECK-P8-LABEL: test4elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: addis r4, r2, .LCPI1_0@toc@ha -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: addi r3, r4, .LCPI1_0@toc@l -; CHECK-P8-NEXT: xxlxor v4, v4, v4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: lvx v3, 0, r3 -; CHECK-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-P8-NEXT: xxlxor v2, v2, v2 +; CHECK-P8-NEXT: mtvsrd v3, r3 +; CHECK-P8-NEXT: vmrghh v2, v2, v3 ; CHECK-P8-NEXT: xvcvuxwsp v2, v2 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test4elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-P9-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-P9-NEXT: lxvx v3, 0, r3 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: xxlxor v4, v4, v4 -; CHECK-P9-NEXT: vperm v2, v4, v2, v3 +; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-P9-NEXT: xvcvuxwsp v2, v2 ; CHECK-P9-NEXT: blr ; @@ -121,17 +113,11 @@ entry: define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, <8 x i16> %a) local_unnamed_addr #2 { ; CHECK-P8-LABEL: test8elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: addis r4, r2, .LCPI2_0@toc@ha -; CHECK-P8-NEXT: addis r5, r2, .LCPI2_1@toc@ha -; CHECK-P8-NEXT: xxlxor v4, v4, v4 -; CHECK-P8-NEXT: addi r4, r4, .LCPI2_0@toc@l -; CHECK-P8-NEXT: lvx v3, 0, r4 -; CHECK-P8-NEXT: addi r4, r5, .LCPI2_1@toc@l -; CHECK-P8-NEXT: lvx v5, 0, r4 +; CHECK-P8-NEXT: xxlxor v3, v3, v3 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: vperm v3, v4, v2, v3 -; CHECK-P8-NEXT: vperm v2, v4, v2, v5 -; CHECK-P8-NEXT: xvcvuxwsp v3, v3 +; CHECK-P8-NEXT: vmrglh v4, v3, v2 +; CHECK-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-P8-NEXT: xvcvuxwsp v3, v4 ; CHECK-P8-NEXT: xvcvuxwsp v2, v2 ; CHECK-P8-NEXT: stvx v3, 0, r3 ; CHECK-P8-NEXT: stvx v2, r3, r4 @@ -139,19 +125,13 @@ define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, <8 x i16> ; ; CHECK-P9-LABEL: test8elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: addis r4, r2, .LCPI2_0@toc@ha -; CHECK-P9-NEXT: addi r4, r4, .LCPI2_0@toc@l -; CHECK-P9-NEXT: lxvx v3, 0, r4 -; CHECK-P9-NEXT: xxlxor v4, v4, v4 -; CHECK-P9-NEXT: addis r4, r2, .LCPI2_1@toc@ha -; CHECK-P9-NEXT: addi r4, r4, .LCPI2_1@toc@l -; CHECK-P9-NEXT: vperm v3, v4, v2, v3 -; CHECK-P9-NEXT: xvcvuxwsp vs0, v3 -; CHECK-P9-NEXT: lxvx v3, 0, r4 -; CHECK-P9-NEXT: vperm v2, v4, v2, v3 -; CHECK-P9-NEXT: stxv vs0, 0(r3) +; CHECK-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-P9-NEXT: vmrglh v4, v3, v2 +; CHECK-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-P9-NEXT: xvcvuxwsp vs0, v4 ; CHECK-P9-NEXT: xvcvuxwsp vs1, v2 ; CHECK-P9-NEXT: stxv vs1, 16(r3) +; CHECK-P9-NEXT: stxv vs0, 0(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test8elt: @@ -276,9 +256,9 @@ define i64 @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 { ; CHECK-P8-NEXT: xscvsxdsp f1, f1 ; CHECK-P8-NEXT: xscvdpspn vs0, f0 ; CHECK-P8-NEXT: xscvdpspn vs1, f1 -; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 1 -; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 1 -; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 3 +; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 3 +; CHECK-P8-NEXT: vmrghw v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr @@ -295,12 +275,12 @@ define i64 @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 { ; CHECK-P9-NEXT: xscvdpspn vs0, f0 ; CHECK-P9-NEXT: vextuhrx r3, r3, v2 ; CHECK-P9-NEXT: extsh r3, r3 -; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 1 +; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3 ; CHECK-P9-NEXT: mtfprwa f0, r3 ; CHECK-P9-NEXT: xscvsxdsp f0, f0 ; CHECK-P9-NEXT: xscvdpspn vs0, f0 -; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 1 -; CHECK-P9-NEXT: vmrglw v2, v2, v3 +; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 3 +; CHECK-P9-NEXT: vmrghw v2, v2, v3 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; @@ -332,11 +312,10 @@ entry: define <4 x float> @test4elt_signed(i64 %a.coerce) local_unnamed_addr #1 { ; CHECK-P8-LABEL: test4elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: mtfprd f0, r3 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: vspltisw v3, 8 -; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: vmrghh v2, v2, v2 ; CHECK-P8-NEXT: vadduwm v3, v3, v3 -; CHECK-P8-NEXT: vmrglh v2, v2, v2 ; CHECK-P8-NEXT: vslw v2, v2, v3 ; CHECK-P8-NEXT: vsraw v2, v2, v3 ; CHECK-P8-NEXT: xvcvsxwsp v2, v2 @@ -344,9 +323,8 @@ define <4 x float> @test4elt_signed(i64 %a.coerce) local_unnamed_addr #1 { ; ; CHECK-P9-LABEL: test4elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mtfprd f0, r3 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: vmrglh v2, v2, v2 +; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: vmrghh v2, v2, v2 ; CHECK-P9-NEXT: vextsh2w v2, v2 ; CHECK-P9-NEXT: xvcvsxwsp v2, v2 ; CHECK-P9-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll index faec95831816..ea8ede3af22a 100644 --- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll @@ -13,11 +13,10 @@ define <2 x double> @test2elt(i32 %a.coerce) local_unnamed_addr #0 { ; CHECK-P8-LABEL: test2elt: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r4, r2, .LCPI0_0@toc@ha -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: addi r3, r4, .LCPI0_0@toc@l +; CHECK-P8-NEXT: mtvsrwz v2, r3 +; CHECK-P8-NEXT: addi r4, r4, .LCPI0_0@toc@l ; CHECK-P8-NEXT: xxlxor v4, v4, v4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: vperm v2, v4, v2, v3 ; CHECK-P8-NEXT: xvcvuxddp v2, v2 ; CHECK-P8-NEXT: blr @@ -53,19 +52,18 @@ define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, i64 %a.c ; CHECK-P8-LABEL: test4elt: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r5, r2, .LCPI1_0@toc@ha -; CHECK-P8-NEXT: mtfprd f0, r4 -; CHECK-P8-NEXT: addis r4, r2, .LCPI1_1@toc@ha +; CHECK-P8-NEXT: addis r6, r2, .LCPI1_1@toc@ha +; CHECK-P8-NEXT: mtvsrd v2, r4 ; CHECK-P8-NEXT: addi r5, r5, .LCPI1_0@toc@l -; CHECK-P8-NEXT: addi r4, r4, .LCPI1_1@toc@l +; CHECK-P8-NEXT: addi r4, r6, .LCPI1_1@toc@l ; CHECK-P8-NEXT: xxlxor v4, v4, v4 -; CHECK-P8-NEXT: lvx v2, 0, r5 -; CHECK-P8-NEXT: xxswapd v3, vs0 +; CHECK-P8-NEXT: lvx v3, 0, r5 ; CHECK-P8-NEXT: lvx v5, 0, r4 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: vperm v2, v4, v3, v2 -; CHECK-P8-NEXT: vperm v3, v4, v3, v5 -; CHECK-P8-NEXT: xvcvuxddp vs0, v2 -; CHECK-P8-NEXT: xvcvuxddp vs1, v3 +; CHECK-P8-NEXT: vperm v3, v4, v2, v3 +; CHECK-P8-NEXT: vperm v2, v4, v2, v5 +; CHECK-P8-NEXT: xvcvuxddp vs0, v3 +; CHECK-P8-NEXT: xvcvuxddp vs1, v2 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: stxvd2x vs1, r3, r4 @@ -74,11 +72,10 @@ define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, i64 %a.c ; ; CHECK-P9-LABEL: test4elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mtfprd f0, r4 +; CHECK-P9-NEXT: mtvsrd v2, r4 ; CHECK-P9-NEXT: addis r4, r2, .LCPI1_0@toc@ha ; CHECK-P9-NEXT: addi r4, r4, .LCPI1_0@toc@l ; CHECK-P9-NEXT: lxvx v3, 0, r4 -; CHECK-P9-NEXT: xxswapd v2, vs0 ; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: addis r4, r2, .LCPI1_1@toc@ha ; CHECK-P9-NEXT: addi r4, r4, .LCPI1_1@toc@l @@ -370,14 +367,13 @@ define <2 x double> @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 { ; CHECK-P8-LABEL: test2elt_signed: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r4, r2, .LCPI4_0@toc@ha -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: addi r3, r4, .LCPI4_0@toc@l -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: mtvsrwz v3, r3 ; CHECK-P8-NEXT: addis r3, r2, .LCPI4_1@toc@ha +; CHECK-P8-NEXT: addi r4, r4, .LCPI4_0@toc@l ; CHECK-P8-NEXT: addi r3, r3, .LCPI4_1@toc@l +; CHECK-P8-NEXT: lvx v2, 0, r4 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 -; CHECK-P8-NEXT: vperm v2, v2, v2, v3 +; CHECK-P8-NEXT: vperm v2, v3, v3, v2 ; CHECK-P8-NEXT: xxswapd v3, vs0 ; CHECK-P8-NEXT: vsld v2, v2, v3 ; CHECK-P8-NEXT: vsrad v2, v2, v3 @@ -415,17 +411,16 @@ define void @test4elt_signed(<4 x double>* noalias nocapture sret %agg.result, i ; CHECK-P8-LABEL: test4elt_signed: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r5, r2, .LCPI5_0@toc@ha -; CHECK-P8-NEXT: mtfprd f0, r4 -; CHECK-P8-NEXT: addis r4, r2, .LCPI5_2@toc@ha -; CHECK-P8-NEXT: addi r5, r5, .LCPI5_0@toc@l -; CHECK-P8-NEXT: addi r4, r4, .LCPI5_2@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r5 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: lvx v4, 0, r4 +; CHECK-P8-NEXT: addis r6, r2, .LCPI5_2@toc@ha +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: addis r4, r2, .LCPI5_1@toc@ha +; CHECK-P8-NEXT: addi r5, r5, .LCPI5_0@toc@l ; CHECK-P8-NEXT: addi r4, r4, .LCPI5_1@toc@l +; CHECK-P8-NEXT: lvx v2, 0, r5 +; CHECK-P8-NEXT: addi r5, r6, .LCPI5_2@toc@l ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: li r4, 16 +; CHECK-P8-NEXT: lvx v4, 0, r5 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2 ; CHECK-P8-NEXT: vperm v3, v3, v3, v4 ; CHECK-P8-NEXT: xxswapd v4, vs0 @@ -443,14 +438,13 @@ define void @test4elt_signed(<4 x double>* noalias nocapture sret %agg.result, i ; ; CHECK-P9-LABEL: test4elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mtfprd f0, r4 +; CHECK-P9-NEXT: mtvsrd v2, r4 ; CHECK-P9-NEXT: addis r4, r2, .LCPI5_0@toc@ha ; CHECK-P9-NEXT: addi r4, r4, .LCPI5_0@toc@l ; CHECK-P9-NEXT: lxvx v3, 0, r4 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: vperm v3, v2, v2, v3 ; CHECK-P9-NEXT: addis r4, r2, .LCPI5_1@toc@ha ; CHECK-P9-NEXT: addi r4, r4, .LCPI5_1@toc@l +; CHECK-P9-NEXT: vperm v3, v2, v2, v3 ; CHECK-P9-NEXT: vextsh2d v3, v3 ; CHECK-P9-NEXT: xvcvsxddp vs0, v3 ; CHECK-P9-NEXT: lxvx v3, 0, r4 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll index 6f046f69ecca..f152c2b008ff 100644 --- a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll @@ -18,9 +18,9 @@ define i64 @test2elt(<2 x i64> %a) local_unnamed_addr #0 { ; CHECK-P8-NEXT: xscvuxdsp f0, f0 ; CHECK-P8-NEXT: xscvdpspn vs1, f1 ; CHECK-P8-NEXT: xscvdpspn vs0, f0 -; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 1 -; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 1 -; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 3 +; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 3 +; CHECK-P8-NEXT: vmrghw v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr @@ -30,12 +30,12 @@ define i64 @test2elt(<2 x i64> %a) local_unnamed_addr #0 { ; CHECK-P9-NEXT: xxswapd vs0, v2 ; CHECK-P9-NEXT: xscvuxdsp f0, f0 ; CHECK-P9-NEXT: xscvdpspn vs0, f0 -; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 1 +; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3 ; CHECK-P9-NEXT: xxlor vs0, v2, v2 ; CHECK-P9-NEXT: xscvuxdsp f0, f0 ; CHECK-P9-NEXT: xscvdpspn vs0, f0 -; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 1 -; CHECK-P9-NEXT: vmrglw v2, v2, v3 +; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 3 +; CHECK-P9-NEXT: vmrghw v2, v2, v3 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; @@ -311,9 +311,9 @@ define i64 @test2elt_signed(<2 x i64> %a) local_unnamed_addr #0 { ; CHECK-P8-NEXT: xscvsxdsp f0, f0 ; CHECK-P8-NEXT: xscvdpspn vs1, f1 ; CHECK-P8-NEXT: xscvdpspn vs0, f0 -; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 1 -; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 1 -; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 3 +; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 3 +; CHECK-P8-NEXT: vmrghw v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr @@ -323,12 +323,12 @@ define i64 @test2elt_signed(<2 x i64> %a) local_unnamed_addr #0 { ; CHECK-P9-NEXT: xxswapd vs0, v2 ; CHECK-P9-NEXT: xscvsxdsp f0, f0 ; CHECK-P9-NEXT: xscvdpspn vs0, f0 -; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 1 +; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3 ; CHECK-P9-NEXT: xxlor vs0, v2, v2 ; CHECK-P9-NEXT: xscvsxdsp f0, f0 ; CHECK-P9-NEXT: xscvdpspn vs0, f0 -; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 1 -; CHECK-P9-NEXT: vmrglw v2, v2, v3 +; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 3 +; CHECK-P9-NEXT: vmrghw v2, v2, v3 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll index ce97ed67baa1..f2cb9f5f45fb 100644 --- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll @@ -24,9 +24,9 @@ define i64 @test2elt(i16 %a.coerce) local_unnamed_addr #0 { ; CHECK-P8-NEXT: xscvuxdsp f1, f1 ; CHECK-P8-NEXT: xscvdpspn vs0, f0 ; CHECK-P8-NEXT: xscvdpspn vs1, f1 -; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 1 -; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 1 -; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 3 +; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 3 +; CHECK-P8-NEXT: vmrghw v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr @@ -43,12 +43,12 @@ define i64 @test2elt(i16 %a.coerce) local_unnamed_addr #0 { ; CHECK-P9-NEXT: xscvdpspn vs0, f0 ; CHECK-P9-NEXT: vextubrx r3, r3, v2 ; CHECK-P9-NEXT: clrlwi r3, r3, 24 -; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 1 +; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3 ; CHECK-P9-NEXT: mtfprwz f0, r3 ; CHECK-P9-NEXT: xscvuxdsp f0, f0 ; CHECK-P9-NEXT: xscvdpspn vs0, f0 -; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 1 -; CHECK-P9-NEXT: vmrglw v2, v2, v3 +; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 3 +; CHECK-P9-NEXT: vmrghw v2, v2, v3 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; @@ -81,11 +81,10 @@ define <4 x float> @test4elt(i32 %a.coerce) local_unnamed_addr #1 { ; CHECK-P8-LABEL: test4elt: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r4, r2, .LCPI1_0@toc@ha -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: addi r3, r4, .LCPI1_0@toc@l +; CHECK-P8-NEXT: mtvsrwz v2, r3 +; CHECK-P8-NEXT: addi r4, r4, .LCPI1_0@toc@l ; CHECK-P8-NEXT: xxlxor v4, v4, v4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: vperm v2, v4, v2, v3 ; CHECK-P8-NEXT: xvcvuxwsp v2, v2 ; CHECK-P8-NEXT: blr @@ -121,30 +120,28 @@ define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, i64 %a.co ; CHECK-P8-LABEL: test8elt: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r5, r2, .LCPI2_0@toc@ha -; CHECK-P8-NEXT: mtfprd f0, r4 -; CHECK-P8-NEXT: addis r4, r2, .LCPI2_1@toc@ha +; CHECK-P8-NEXT: addis r6, r2, .LCPI2_1@toc@ha +; CHECK-P8-NEXT: mtvsrd v2, r4 ; CHECK-P8-NEXT: addi r5, r5, .LCPI2_0@toc@l -; CHECK-P8-NEXT: addi r4, r4, .LCPI2_1@toc@l +; CHECK-P8-NEXT: addi r4, r6, .LCPI2_1@toc@l ; CHECK-P8-NEXT: xxlxor v4, v4, v4 -; CHECK-P8-NEXT: lvx v2, 0, r5 -; CHECK-P8-NEXT: xxswapd v3, vs0 +; CHECK-P8-NEXT: lvx v3, 0, r5 ; CHECK-P8-NEXT: lvx v5, 0, r4 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: vperm v2, v4, v3, v2 -; CHECK-P8-NEXT: vperm v3, v4, v3, v5 -; CHECK-P8-NEXT: xvcvuxwsp v2, v2 +; CHECK-P8-NEXT: vperm v3, v4, v2, v3 +; CHECK-P8-NEXT: vperm v2, v4, v2, v5 ; CHECK-P8-NEXT: xvcvuxwsp v3, v3 -; CHECK-P8-NEXT: stvx v2, 0, r3 -; CHECK-P8-NEXT: stvx v3, r3, r4 +; CHECK-P8-NEXT: xvcvuxwsp v2, v2 +; CHECK-P8-NEXT: stvx v3, 0, r3 +; CHECK-P8-NEXT: stvx v2, r3, r4 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test8elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mtfprd f0, r4 +; CHECK-P9-NEXT: mtvsrd v2, r4 ; CHECK-P9-NEXT: addis r4, r2, .LCPI2_0@toc@ha ; CHECK-P9-NEXT: addi r4, r4, .LCPI2_0@toc@l ; CHECK-P9-NEXT: lxvx v3, 0, r4 -; CHECK-P9-NEXT: xxswapd v2, vs0 ; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: addis r4, r2, .LCPI2_1@toc@ha ; CHECK-P9-NEXT: addi r4, r4, .LCPI2_1@toc@l @@ -292,9 +289,9 @@ define i64 @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 { ; CHECK-P8-NEXT: xscvsxdsp f1, f1 ; CHECK-P8-NEXT: xscvdpspn vs0, f0 ; CHECK-P8-NEXT: xscvdpspn vs1, f1 -; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 1 -; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 1 -; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 3 +; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 3 +; CHECK-P8-NEXT: vmrghw v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr @@ -311,12 +308,12 @@ define i64 @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 { ; CHECK-P9-NEXT: xscvdpspn vs0, f0 ; CHECK-P9-NEXT: vextubrx r3, r3, v2 ; CHECK-P9-NEXT: extsb r3, r3 -; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 1 +; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3 ; CHECK-P9-NEXT: mtfprwa f0, r3 ; CHECK-P9-NEXT: xscvsxdsp f0, f0 ; CHECK-P9-NEXT: xscvdpspn vs0, f0 -; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 1 -; CHECK-P9-NEXT: vmrglw v2, v2, v3 +; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 3 +; CHECK-P9-NEXT: vmrghw v2, v2, v3 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; @@ -349,11 +346,10 @@ define <4 x float> @test4elt_signed(i32 %a.coerce) local_unnamed_addr #1 { ; CHECK-P8-LABEL: test4elt_signed: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r4, r2, .LCPI5_0@toc@ha -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: addi r3, r4, .LCPI5_0@toc@l -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: lvx v3, 0, r3 -; CHECK-P8-NEXT: vperm v2, v2, v2, v3 +; CHECK-P8-NEXT: mtvsrwz v3, r3 +; CHECK-P8-NEXT: addi r4, r4, .LCPI5_0@toc@l +; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: vperm v2, v3, v3, v2 ; CHECK-P8-NEXT: vspltisw v3, 12 ; CHECK-P8-NEXT: vadduwm v3, v3, v3 ; CHECK-P8-NEXT: vslw v2, v2, v3 @@ -392,15 +388,14 @@ define void @test8elt_signed(<8 x float>* noalias nocapture sret %agg.result, i6 ; CHECK-P8-LABEL: test8elt_signed: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r5, r2, .LCPI6_0@toc@ha -; CHECK-P8-NEXT: mtfprd f0, r4 -; CHECK-P8-NEXT: addis r4, r2, .LCPI6_1@toc@ha +; CHECK-P8-NEXT: addis r6, r2, .LCPI6_1@toc@ha +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: vspltisw v5, 12 -; CHECK-P8-NEXT: addi r5, r5, .LCPI6_0@toc@l -; CHECK-P8-NEXT: addi r4, r4, .LCPI6_1@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r5 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: lvx v4, 0, r4 ; CHECK-P8-NEXT: li r4, 16 +; CHECK-P8-NEXT: addi r5, r5, .LCPI6_0@toc@l +; CHECK-P8-NEXT: lvx v2, 0, r5 +; CHECK-P8-NEXT: addi r5, r6, .LCPI6_1@toc@l +; CHECK-P8-NEXT: lvx v4, 0, r5 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2 ; CHECK-P8-NEXT: vperm v3, v3, v3, v4 ; CHECK-P8-NEXT: vadduwm v4, v5, v5 @@ -416,14 +411,13 @@ define void @test8elt_signed(<8 x float>* noalias nocapture sret %agg.result, i6 ; ; CHECK-P9-LABEL: test8elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mtfprd f0, r4 +; CHECK-P9-NEXT: mtvsrd v2, r4 ; CHECK-P9-NEXT: addis r4, r2, .LCPI6_0@toc@ha ; CHECK-P9-NEXT: addi r4, r4, .LCPI6_0@toc@l ; CHECK-P9-NEXT: lxvx v3, 0, r4 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: vperm v3, v2, v2, v3 ; CHECK-P9-NEXT: addis r4, r2, .LCPI6_1@toc@ha ; CHECK-P9-NEXT: addi r4, r4, .LCPI6_1@toc@l +; CHECK-P9-NEXT: vperm v3, v2, v2, v3 ; CHECK-P9-NEXT: vextsb2w v3, v3 ; CHECK-P9-NEXT: xvcvsxwsp vs0, v3 ; CHECK-P9-NEXT: lxvx v3, 0, r4 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll index b4582e844f30..268fc9b7d4cc 100644 --- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll @@ -13,11 +13,10 @@ define <2 x double> @test2elt(i16 %a.coerce) local_unnamed_addr #0 { ; CHECK-P8-LABEL: test2elt: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r4, r2, .LCPI0_0@toc@ha -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: addi r3, r4, .LCPI0_0@toc@l +; CHECK-P8-NEXT: mtvsrwz v2, r3 +; CHECK-P8-NEXT: addi r4, r4, .LCPI0_0@toc@l ; CHECK-P8-NEXT: xxlxor v4, v4, v4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: vperm v2, v4, v2, v3 ; CHECK-P8-NEXT: xvcvuxddp v2, v2 ; CHECK-P8-NEXT: blr @@ -53,19 +52,18 @@ define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, i32 %a.c ; CHECK-P8-LABEL: test4elt: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r5, r2, .LCPI1_0@toc@ha -; CHECK-P8-NEXT: mtfprd f0, r4 -; CHECK-P8-NEXT: addis r4, r2, .LCPI1_1@toc@ha +; CHECK-P8-NEXT: addis r6, r2, .LCPI1_1@toc@ha +; CHECK-P8-NEXT: mtvsrwz v2, r4 ; CHECK-P8-NEXT: addi r5, r5, .LCPI1_0@toc@l -; CHECK-P8-NEXT: addi r4, r4, .LCPI1_1@toc@l +; CHECK-P8-NEXT: addi r4, r6, .LCPI1_1@toc@l ; CHECK-P8-NEXT: xxlxor v4, v4, v4 -; CHECK-P8-NEXT: lvx v2, 0, r5 -; CHECK-P8-NEXT: xxswapd v3, vs0 +; CHECK-P8-NEXT: lvx v3, 0, r5 ; CHECK-P8-NEXT: lvx v5, 0, r4 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: vperm v2, v4, v3, v2 -; CHECK-P8-NEXT: vperm v3, v4, v3, v5 -; CHECK-P8-NEXT: xvcvuxddp vs0, v2 -; CHECK-P8-NEXT: xvcvuxddp vs1, v3 +; CHECK-P8-NEXT: vperm v3, v4, v2, v3 +; CHECK-P8-NEXT: vperm v2, v4, v2, v5 +; CHECK-P8-NEXT: xvcvuxddp vs0, v3 +; CHECK-P8-NEXT: xvcvuxddp vs1, v2 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: stxvd2x vs1, r3, r4 @@ -118,33 +116,32 @@ define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, i64 %a.c ; CHECK-P8-LABEL: test8elt: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r5, r2, .LCPI2_0@toc@ha -; CHECK-P8-NEXT: mtfprd f0, r4 -; CHECK-P8-NEXT: addis r4, r2, .LCPI2_2@toc@ha +; CHECK-P8-NEXT: addis r6, r2, .LCPI2_2@toc@ha +; CHECK-P8-NEXT: mtvsrd v2, r4 +; CHECK-P8-NEXT: addis r4, r2, .LCPI2_3@toc@ha ; CHECK-P8-NEXT: addi r5, r5, .LCPI2_0@toc@l -; CHECK-P8-NEXT: addi r4, r4, .LCPI2_2@toc@l +; CHECK-P8-NEXT: addi r4, r4, .LCPI2_3@toc@l ; CHECK-P8-NEXT: xxlxor v4, v4, v4 -; CHECK-P8-NEXT: lvx v2, 0, r5 -; CHECK-P8-NEXT: addis r5, r2, .LCPI2_3@toc@ha -; CHECK-P8-NEXT: lvx v5, 0, r4 -; CHECK-P8-NEXT: addis r4, r2, .LCPI2_1@toc@ha -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: addi r5, r5, .LCPI2_3@toc@l -; CHECK-P8-NEXT: addi r4, r4, .LCPI2_1@toc@l -; CHECK-P8-NEXT: lvx v0, 0, r5 -; CHECK-P8-NEXT: lvx v1, 0, r4 +; CHECK-P8-NEXT: lvx v3, 0, r5 +; CHECK-P8-NEXT: addi r5, r6, .LCPI2_2@toc@l +; CHECK-P8-NEXT: lvx v0, 0, r4 ; CHECK-P8-NEXT: li r4, 48 +; CHECK-P8-NEXT: lvx v5, 0, r5 +; CHECK-P8-NEXT: addis r5, r2, .LCPI2_1@toc@ha +; CHECK-P8-NEXT: addi r5, r5, .LCPI2_1@toc@l +; CHECK-P8-NEXT: lvx v1, 0, r5 +; CHECK-P8-NEXT: vperm v0, v4, v2, v0 ; CHECK-P8-NEXT: li r5, 32 -; CHECK-P8-NEXT: vperm v2, v4, v3, v2 -; CHECK-P8-NEXT: vperm v5, v4, v3, v5 -; CHECK-P8-NEXT: vperm v0, v4, v3, v0 -; CHECK-P8-NEXT: vperm v3, v4, v3, v1 -; CHECK-P8-NEXT: xvcvuxddp vs0, v2 -; CHECK-P8-NEXT: xvcvuxddp vs1, v5 +; CHECK-P8-NEXT: vperm v3, v4, v2, v3 +; CHECK-P8-NEXT: vperm v5, v4, v2, v5 +; CHECK-P8-NEXT: vperm v2, v4, v2, v1 ; CHECK-P8-NEXT: xvcvuxddp vs2, v0 -; CHECK-P8-NEXT: xvcvuxddp vs3, v3 +; CHECK-P8-NEXT: xvcvuxddp vs0, v3 +; CHECK-P8-NEXT: xvcvuxddp vs1, v5 +; CHECK-P8-NEXT: xvcvuxddp vs3, v2 +; CHECK-P8-NEXT: xxswapd vs2, vs2 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: xxswapd vs1, vs1 -; CHECK-P8-NEXT: xxswapd vs2, vs2 ; CHECK-P8-NEXT: xxswapd vs3, vs3 ; CHECK-P8-NEXT: stxvd2x vs2, r3, r4 ; CHECK-P8-NEXT: li r4, 16 @@ -155,11 +152,10 @@ define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, i64 %a.c ; ; CHECK-P9-LABEL: test8elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mtfprd f0, r4 +; CHECK-P9-NEXT: mtvsrd v2, r4 ; CHECK-P9-NEXT: addis r4, r2, .LCPI2_0@toc@ha ; CHECK-P9-NEXT: addi r4, r4, .LCPI2_0@toc@l ; CHECK-P9-NEXT: lxvx v3, 0, r4 -; CHECK-P9-NEXT: xxswapd v2, vs0 ; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: addis r4, r2, .LCPI2_1@toc@ha ; CHECK-P9-NEXT: addi r4, r4, .LCPI2_1@toc@l @@ -404,14 +400,13 @@ define <2 x double> @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 { ; CHECK-P8-LABEL: test2elt_signed: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r4, r2, .LCPI4_0@toc@ha -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: addi r3, r4, .LCPI4_0@toc@l -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: mtvsrwz v3, r3 ; CHECK-P8-NEXT: addis r3, r2, .LCPI4_1@toc@ha +; CHECK-P8-NEXT: addi r4, r4, .LCPI4_0@toc@l ; CHECK-P8-NEXT: addi r3, r3, .LCPI4_1@toc@l +; CHECK-P8-NEXT: lvx v2, 0, r4 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 -; CHECK-P8-NEXT: vperm v2, v2, v2, v3 +; CHECK-P8-NEXT: vperm v2, v3, v3, v2 ; CHECK-P8-NEXT: xxswapd v3, vs0 ; CHECK-P8-NEXT: vsld v2, v2, v3 ; CHECK-P8-NEXT: vsrad v2, v2, v3 @@ -449,17 +444,16 @@ define void @test4elt_signed(<4 x double>* noalias nocapture sret %agg.result, i ; CHECK-P8-LABEL: test4elt_signed: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r5, r2, .LCPI5_0@toc@ha -; CHECK-P8-NEXT: mtfprd f0, r4 -; CHECK-P8-NEXT: addis r4, r2, .LCPI5_2@toc@ha -; CHECK-P8-NEXT: addi r5, r5, .LCPI5_0@toc@l -; CHECK-P8-NEXT: addi r4, r4, .LCPI5_2@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r5 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: lvx v4, 0, r4 +; CHECK-P8-NEXT: addis r6, r2, .LCPI5_2@toc@ha +; CHECK-P8-NEXT: mtvsrwz v3, r4 ; CHECK-P8-NEXT: addis r4, r2, .LCPI5_1@toc@ha +; CHECK-P8-NEXT: addi r5, r5, .LCPI5_0@toc@l ; CHECK-P8-NEXT: addi r4, r4, .LCPI5_1@toc@l +; CHECK-P8-NEXT: lvx v2, 0, r5 +; CHECK-P8-NEXT: addi r5, r6, .LCPI5_2@toc@l ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: li r4, 16 +; CHECK-P8-NEXT: lvx v4, 0, r5 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2 ; CHECK-P8-NEXT: vperm v3, v3, v3, v4 ; CHECK-P8-NEXT: xxswapd v4, vs0 @@ -523,26 +517,25 @@ entry: define void @test8elt_signed(<8 x double>* noalias nocapture sret %agg.result, i64 %a.coerce) local_unnamed_addr #1 { ; CHECK-P8-LABEL: test8elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: mtfprd f0, r4 -; CHECK-P8-NEXT: addis r4, r2, .LCPI6_2@toc@ha ; CHECK-P8-NEXT: addis r5, r2, .LCPI6_0@toc@ha -; CHECK-P8-NEXT: addis r6, r2, .LCPI6_3@toc@ha -; CHECK-P8-NEXT: addi r4, r4, .LCPI6_2@toc@l +; CHECK-P8-NEXT: addis r6, r2, .LCPI6_2@toc@ha +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: addis r4, r2, .LCPI6_1@toc@ha ; CHECK-P8-NEXT: addi r5, r5, .LCPI6_0@toc@l -; CHECK-P8-NEXT: addi r6, r6, .LCPI6_3@toc@l -; CHECK-P8-NEXT: lvx v4, 0, r4 -; CHECK-P8-NEXT: addis r4, r2, .LCPI6_4@toc@ha +; CHECK-P8-NEXT: addi r6, r6, .LCPI6_2@toc@l +; CHECK-P8-NEXT: addi r4, r4, .LCPI6_1@toc@l ; CHECK-P8-NEXT: lvx v2, 0, r5 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: lvx v5, 0, r6 -; CHECK-P8-NEXT: addis r5, r2, .LCPI6_1@toc@ha -; CHECK-P8-NEXT: addi r4, r4, .LCPI6_4@toc@l -; CHECK-P8-NEXT: addi r5, r5, .LCPI6_1@toc@l -; CHECK-P8-NEXT: lvx v0, 0, r4 -; CHECK-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-P8-NEXT: addis r5, r2, .LCPI6_3@toc@ha +; CHECK-P8-NEXT: lvx v4, 0, r6 +; CHECK-P8-NEXT: addis r6, r2, .LCPI6_4@toc@ha +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: li r5, 32 +; CHECK-P8-NEXT: addi r5, r5, .LCPI6_3@toc@l +; CHECK-P8-NEXT: lvx v5, 0, r5 +; CHECK-P8-NEXT: addi r5, r6, .LCPI6_4@toc@l +; CHECK-P8-NEXT: lvx v0, 0, r5 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2 +; CHECK-P8-NEXT: li r5, 32 ; CHECK-P8-NEXT: vperm v4, v3, v3, v4 ; CHECK-P8-NEXT: vperm v5, v3, v3, v5 ; CHECK-P8-NEXT: vperm v3, v3, v3, v0 @@ -572,14 +565,13 @@ define void @test8elt_signed(<8 x double>* noalias nocapture sret %agg.result, i ; ; CHECK-P9-LABEL: test8elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mtfprd f0, r4 +; CHECK-P9-NEXT: mtvsrd v2, r4 ; CHECK-P9-NEXT: addis r4, r2, .LCPI6_0@toc@ha ; CHECK-P9-NEXT: addi r4, r4, .LCPI6_0@toc@l ; CHECK-P9-NEXT: lxvx v3, 0, r4 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: vperm v3, v2, v2, v3 ; CHECK-P9-NEXT: addis r4, r2, .LCPI6_1@toc@ha ; CHECK-P9-NEXT: addi r4, r4, .LCPI6_1@toc@l +; CHECK-P9-NEXT: vperm v3, v2, v2, v3 ; CHECK-P9-NEXT: vextsb2d v3, v3 ; CHECK-P9-NEXT: xvcvsxddp vs0, v3 ; CHECK-P9-NEXT: lxvx v3, 0, r4 diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll index 7e51f2b862ab..29955dc17f67 100644 --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -82,10 +82,10 @@ define <3 x float> @constrained_vector_fdiv_v3f32() #0 { ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: xscvdpspn 2, 2 ; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 1, 1, 1 -; PC64LE-NEXT: xxsldwi 35, 2, 2, 1 -; PC64LE-NEXT: vmrglw 2, 3, 2 -; PC64LE-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 34, 1, 1, 3 +; PC64LE-NEXT: xxsldwi 35, 2, 2, 3 +; PC64LE-NEXT: vmrghw 2, 3, 2 +; PC64LE-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE-NEXT: vperm 2, 3, 2, 4 ; PC64LE-NEXT: blr ; @@ -106,12 +106,12 @@ define <3 x float> @constrained_vector_fdiv_v3f32() #0 { ; PC64LE9-NEXT: xsdivsp 2, 2, 0 ; PC64LE9-NEXT: xsdivsp 0, 3, 0 ; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 2 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: vmrglw 2, 3, 2 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: vmrghw 2, 3, 2 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 3, 2, 4 ; PC64LE9-NEXT: blr entry: @@ -359,11 +359,11 @@ define <3 x float> @constrained_vector_frem_v3f32() #0 { ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI7_4@toc@l ; PC64LE-NEXT: lvx 4, 0, 3 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE-NEXT: xscvdpspn 0, 30 -; PC64LE-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE-NEXT: vmrglw 2, 2, 3 -; PC64LE-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 35, 1, 1, 3 +; PC64LE-NEXT: vmrghw 2, 2, 3 +; PC64LE-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE-NEXT: vperm 2, 3, 2, 4 ; PC64LE-NEXT: addi 1, 1, 64 ; PC64LE-NEXT: ld 0, 16(1) @@ -401,15 +401,15 @@ define <3 x float> @constrained_vector_frem_v3f32() #0 { ; PC64LE9-NEXT: bl fmodf ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 29 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI7_4@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI7_4@toc@l ; PC64LE9-NEXT: lxvx 36, 0, 3 -; PC64LE9-NEXT: vmrglw 2, 3, 2 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: vmrghw 2, 3, 2 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 3, 2, 4 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) @@ -710,10 +710,10 @@ define <3 x float> @constrained_vector_fmul_v3f32() #0 { ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: xscvdpspn 2, 2 ; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 1, 1, 1 -; PC64LE-NEXT: xxsldwi 35, 2, 2, 1 -; PC64LE-NEXT: vmrglw 2, 3, 2 -; PC64LE-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 34, 1, 1, 3 +; PC64LE-NEXT: xxsldwi 35, 2, 2, 3 +; PC64LE-NEXT: vmrghw 2, 3, 2 +; PC64LE-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE-NEXT: vperm 2, 3, 2, 4 ; PC64LE-NEXT: blr ; @@ -735,11 +735,11 @@ define <3 x float> @constrained_vector_fmul_v3f32() #0 { ; PC64LE9-NEXT: xsmulsp 1, 1, 3 ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xxsldwi 34, 1, 1, 1 +; PC64LE9-NEXT: xxsldwi 34, 1, 1, 3 ; PC64LE9-NEXT: xscvdpspn 1, 2 -; PC64LE9-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE9-NEXT: vmrglw 2, 3, 2 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 35, 1, 1, 3 +; PC64LE9-NEXT: vmrghw 2, 3, 2 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 3, 2, 4 ; PC64LE9-NEXT: blr entry: @@ -925,10 +925,10 @@ define <3 x float> @constrained_vector_fadd_v3f32() #0 { ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: xscvdpspn 2, 2 ; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 1, 1, 1 -; PC64LE-NEXT: xxsldwi 35, 2, 2, 1 -; PC64LE-NEXT: vmrglw 2, 3, 2 -; PC64LE-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 34, 1, 1, 3 +; PC64LE-NEXT: xxsldwi 35, 2, 2, 3 +; PC64LE-NEXT: vmrghw 2, 3, 2 +; PC64LE-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE-NEXT: vperm 2, 3, 2, 4 ; PC64LE-NEXT: blr ; @@ -945,15 +945,15 @@ define <3 x float> @constrained_vector_fadd_v3f32() #0 { ; PC64LE9-NEXT: xsaddsp 1, 0, 1 ; PC64LE9-NEXT: xsaddsp 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 2 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 1 ; PC64LE9-NEXT: addis 3, 2, .LCPI17_3@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI17_3@toc@l ; PC64LE9-NEXT: lxvx 36, 0, 3 -; PC64LE9-NEXT: vmrglw 2, 3, 2 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: vmrghw 2, 3, 2 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 3, 2, 4 ; PC64LE9-NEXT: blr entry: @@ -1137,10 +1137,10 @@ define <3 x float> @constrained_vector_fsub_v3f32() #0 { ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: xscvdpspn 2, 2 ; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 1, 1, 1 -; PC64LE-NEXT: xxsldwi 35, 2, 2, 1 -; PC64LE-NEXT: vmrglw 2, 3, 2 -; PC64LE-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 34, 1, 1, 3 +; PC64LE-NEXT: xxsldwi 35, 2, 2, 3 +; PC64LE-NEXT: vmrghw 2, 3, 2 +; PC64LE-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE-NEXT: vperm 2, 3, 2, 4 ; PC64LE-NEXT: blr ; @@ -1157,15 +1157,15 @@ define <3 x float> @constrained_vector_fsub_v3f32() #0 { ; PC64LE9-NEXT: xssubsp 1, 0, 1 ; PC64LE9-NEXT: xssubsp 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 2 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 1 ; PC64LE9-NEXT: addis 3, 2, .LCPI22_3@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI22_3@toc@l ; PC64LE9-NEXT: lxvx 36, 0, 3 -; PC64LE9-NEXT: vmrglw 2, 3, 2 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: vmrghw 2, 3, 2 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 3, 2, 4 ; PC64LE9-NEXT: blr entry: @@ -1333,12 +1333,12 @@ define <3 x float> @constrained_vector_sqrt_v3f32() #0 { ; PC64LE-NEXT: xssqrtsp 2, 2 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xscvdpspn 1, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE-NEXT: xscvdpspn 0, 2 -; PC64LE-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE-NEXT: vmrglw 2, 3, 2 +; PC64LE-NEXT: xxsldwi 35, 1, 1, 3 +; PC64LE-NEXT: vmrghw 2, 3, 2 ; PC64LE-NEXT: lvx 3, 0, 3 -; PC64LE-NEXT: xxsldwi 36, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 36, 0, 0, 3 ; PC64LE-NEXT: vperm 2, 4, 2, 3 ; PC64LE-NEXT: blr ; @@ -1358,10 +1358,10 @@ define <3 x float> @constrained_vector_sqrt_v3f32() #0 { ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xscvdpspn 1, 1 ; PC64LE9-NEXT: xscvdpspn 2, 2 -; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1 -; PC64LE9-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE9-NEXT: xxsldwi 34, 2, 2, 1 -; PC64LE9-NEXT: vmrglw 2, 3, 2 +; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3 +; PC64LE9-NEXT: xxsldwi 35, 1, 1, 3 +; PC64LE9-NEXT: xxsldwi 34, 2, 2, 3 +; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: lxvx 35, 0, 3 ; PC64LE9-NEXT: vperm 2, 4, 2, 3 ; PC64LE9-NEXT: blr @@ -1588,11 +1588,11 @@ define <3 x float> @constrained_vector_pow_v3f32() #0 { ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI32_4@toc@l ; PC64LE-NEXT: lvx 4, 0, 3 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE-NEXT: xscvdpspn 0, 30 -; PC64LE-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE-NEXT: vmrglw 2, 2, 3 -; PC64LE-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 35, 1, 1, 3 +; PC64LE-NEXT: vmrghw 2, 2, 3 +; PC64LE-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE-NEXT: vperm 2, 3, 2, 4 ; PC64LE-NEXT: addi 1, 1, 64 ; PC64LE-NEXT: ld 0, 16(1) @@ -1630,15 +1630,15 @@ define <3 x float> @constrained_vector_pow_v3f32() #0 { ; PC64LE9-NEXT: bl powf ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 29 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI32_4@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI32_4@toc@l ; PC64LE9-NEXT: lxvx 36, 0, 3 -; PC64LE9-NEXT: vmrglw 2, 3, 2 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: vmrghw 2, 3, 2 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 3, 2, 4 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) @@ -1992,11 +1992,11 @@ define <3 x float> @constrained_vector_powi_v3f32() #0 { ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI37_3@toc@l ; PC64LE-NEXT: lvx 4, 0, 3 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE-NEXT: xscvdpspn 0, 31 -; PC64LE-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE-NEXT: vmrglw 2, 2, 3 -; PC64LE-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 35, 1, 1, 3 +; PC64LE-NEXT: vmrghw 2, 2, 3 +; PC64LE-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE-NEXT: vperm 2, 3, 2, 4 ; PC64LE-NEXT: addi 1, 1, 48 ; PC64LE-NEXT: ld 0, 16(1) @@ -2030,15 +2030,15 @@ define <3 x float> @constrained_vector_powi_v3f32() #0 { ; PC64LE9-NEXT: bl __powisf2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 31 ; PC64LE9-NEXT: addis 3, 2, .LCPI37_3@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI37_3@toc@l ; PC64LE9-NEXT: lxvx 36, 0, 3 -; PC64LE9-NEXT: vmrglw 2, 3, 2 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: vmrghw 2, 3, 2 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 3, 2, 4 ; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) @@ -2360,12 +2360,12 @@ define <3 x float> @constrained_vector_sin_v3f32() #0 { ; PC64LE-NEXT: addis 3, 2, .LCPI42_3@toc@ha ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI42_3@toc@l -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE-NEXT: xscvdpspn 0, 31 -; PC64LE-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE-NEXT: vmrglw 2, 2, 3 +; PC64LE-NEXT: xxsldwi 35, 1, 1, 3 +; PC64LE-NEXT: vmrghw 2, 2, 3 ; PC64LE-NEXT: lvx 3, 0, 3 -; PC64LE-NEXT: xxsldwi 36, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 36, 0, 0, 3 ; PC64LE-NEXT: vperm 2, 4, 2, 3 ; PC64LE-NEXT: addi 1, 1, 48 ; PC64LE-NEXT: ld 0, 16(1) @@ -2396,15 +2396,15 @@ define <3 x float> @constrained_vector_sin_v3f32() #0 { ; PC64LE9-NEXT: bl sinf ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 31 ; PC64LE9-NEXT: addis 3, 2, .LCPI42_3@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI42_3@toc@l -; PC64LE9-NEXT: vmrglw 2, 3, 2 +; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: lxvx 35, 0, 3 -; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 4, 2, 3 ; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) @@ -2709,12 +2709,12 @@ define <3 x float> @constrained_vector_cos_v3f32() #0 { ; PC64LE-NEXT: addis 3, 2, .LCPI47_3@toc@ha ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI47_3@toc@l -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE-NEXT: xscvdpspn 0, 31 -; PC64LE-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE-NEXT: vmrglw 2, 2, 3 +; PC64LE-NEXT: xxsldwi 35, 1, 1, 3 +; PC64LE-NEXT: vmrghw 2, 2, 3 ; PC64LE-NEXT: lvx 3, 0, 3 -; PC64LE-NEXT: xxsldwi 36, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 36, 0, 0, 3 ; PC64LE-NEXT: vperm 2, 4, 2, 3 ; PC64LE-NEXT: addi 1, 1, 48 ; PC64LE-NEXT: ld 0, 16(1) @@ -2745,15 +2745,15 @@ define <3 x float> @constrained_vector_cos_v3f32() #0 { ; PC64LE9-NEXT: bl cosf ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 31 ; PC64LE9-NEXT: addis 3, 2, .LCPI47_3@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI47_3@toc@l -; PC64LE9-NEXT: vmrglw 2, 3, 2 +; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: lxvx 35, 0, 3 -; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 4, 2, 3 ; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) @@ -3058,12 +3058,12 @@ define <3 x float> @constrained_vector_exp_v3f32() #0 { ; PC64LE-NEXT: addis 3, 2, .LCPI52_3@toc@ha ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI52_3@toc@l -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE-NEXT: xscvdpspn 0, 31 -; PC64LE-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE-NEXT: vmrglw 2, 2, 3 +; PC64LE-NEXT: xxsldwi 35, 1, 1, 3 +; PC64LE-NEXT: vmrghw 2, 2, 3 ; PC64LE-NEXT: lvx 3, 0, 3 -; PC64LE-NEXT: xxsldwi 36, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 36, 0, 0, 3 ; PC64LE-NEXT: vperm 2, 4, 2, 3 ; PC64LE-NEXT: addi 1, 1, 48 ; PC64LE-NEXT: ld 0, 16(1) @@ -3094,15 +3094,15 @@ define <3 x float> @constrained_vector_exp_v3f32() #0 { ; PC64LE9-NEXT: bl expf ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 31 ; PC64LE9-NEXT: addis 3, 2, .LCPI52_3@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI52_3@toc@l -; PC64LE9-NEXT: vmrglw 2, 3, 2 +; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: lxvx 35, 0, 3 -; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 4, 2, 3 ; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) @@ -3407,12 +3407,12 @@ define <3 x float> @constrained_vector_exp2_v3f32() #0 { ; PC64LE-NEXT: addis 3, 2, .LCPI57_3@toc@ha ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI57_3@toc@l -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE-NEXT: xscvdpspn 0, 31 -; PC64LE-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE-NEXT: vmrglw 2, 2, 3 +; PC64LE-NEXT: xxsldwi 35, 1, 1, 3 +; PC64LE-NEXT: vmrghw 2, 2, 3 ; PC64LE-NEXT: lvx 3, 0, 3 -; PC64LE-NEXT: xxsldwi 36, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 36, 0, 0, 3 ; PC64LE-NEXT: vperm 2, 4, 2, 3 ; PC64LE-NEXT: addi 1, 1, 48 ; PC64LE-NEXT: ld 0, 16(1) @@ -3443,15 +3443,15 @@ define <3 x float> @constrained_vector_exp2_v3f32() #0 { ; PC64LE9-NEXT: bl exp2f ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 31 ; PC64LE9-NEXT: addis 3, 2, .LCPI57_3@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI57_3@toc@l -; PC64LE9-NEXT: vmrglw 2, 3, 2 +; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: lxvx 35, 0, 3 -; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 4, 2, 3 ; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) @@ -3756,12 +3756,12 @@ define <3 x float> @constrained_vector_log_v3f32() #0 { ; PC64LE-NEXT: addis 3, 2, .LCPI62_3@toc@ha ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI62_3@toc@l -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE-NEXT: xscvdpspn 0, 31 -; PC64LE-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE-NEXT: vmrglw 2, 2, 3 +; PC64LE-NEXT: xxsldwi 35, 1, 1, 3 +; PC64LE-NEXT: vmrghw 2, 2, 3 ; PC64LE-NEXT: lvx 3, 0, 3 -; PC64LE-NEXT: xxsldwi 36, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 36, 0, 0, 3 ; PC64LE-NEXT: vperm 2, 4, 2, 3 ; PC64LE-NEXT: addi 1, 1, 48 ; PC64LE-NEXT: ld 0, 16(1) @@ -3792,15 +3792,15 @@ define <3 x float> @constrained_vector_log_v3f32() #0 { ; PC64LE9-NEXT: bl logf ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 31 ; PC64LE9-NEXT: addis 3, 2, .LCPI62_3@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI62_3@toc@l -; PC64LE9-NEXT: vmrglw 2, 3, 2 +; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: lxvx 35, 0, 3 -; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 4, 2, 3 ; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) @@ -4105,12 +4105,12 @@ define <3 x float> @constrained_vector_log10_v3f32() #0 { ; PC64LE-NEXT: addis 3, 2, .LCPI67_3@toc@ha ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI67_3@toc@l -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE-NEXT: xscvdpspn 0, 31 -; PC64LE-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE-NEXT: vmrglw 2, 2, 3 +; PC64LE-NEXT: xxsldwi 35, 1, 1, 3 +; PC64LE-NEXT: vmrghw 2, 2, 3 ; PC64LE-NEXT: lvx 3, 0, 3 -; PC64LE-NEXT: xxsldwi 36, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 36, 0, 0, 3 ; PC64LE-NEXT: vperm 2, 4, 2, 3 ; PC64LE-NEXT: addi 1, 1, 48 ; PC64LE-NEXT: ld 0, 16(1) @@ -4141,15 +4141,15 @@ define <3 x float> @constrained_vector_log10_v3f32() #0 { ; PC64LE9-NEXT: bl log10f ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 31 ; PC64LE9-NEXT: addis 3, 2, .LCPI67_3@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI67_3@toc@l -; PC64LE9-NEXT: vmrglw 2, 3, 2 +; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: lxvx 35, 0, 3 -; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 4, 2, 3 ; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) @@ -4454,12 +4454,12 @@ define <3 x float> @constrained_vector_log2_v3f32() #0 { ; PC64LE-NEXT: addis 3, 2, .LCPI72_3@toc@ha ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI72_3@toc@l -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE-NEXT: xscvdpspn 0, 31 -; PC64LE-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE-NEXT: vmrglw 2, 2, 3 +; PC64LE-NEXT: xxsldwi 35, 1, 1, 3 +; PC64LE-NEXT: vmrghw 2, 2, 3 ; PC64LE-NEXT: lvx 3, 0, 3 -; PC64LE-NEXT: xxsldwi 36, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 36, 0, 0, 3 ; PC64LE-NEXT: vperm 2, 4, 2, 3 ; PC64LE-NEXT: addi 1, 1, 48 ; PC64LE-NEXT: ld 0, 16(1) @@ -4490,15 +4490,15 @@ define <3 x float> @constrained_vector_log2_v3f32() #0 { ; PC64LE9-NEXT: bl log2f ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 31 ; PC64LE9-NEXT: addis 3, 2, .LCPI72_3@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI72_3@toc@l -; PC64LE9-NEXT: vmrglw 2, 3, 2 +; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: lxvx 35, 0, 3 -; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 4, 2, 3 ; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) @@ -4748,12 +4748,12 @@ define <3 x float> @constrained_vector_rint_v3f32() #0 { ; PC64LE-NEXT: xsrdpic 2, 2 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xscvdpspn 1, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE-NEXT: xscvdpspn 0, 2 -; PC64LE-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE-NEXT: vmrglw 2, 3, 2 +; PC64LE-NEXT: xxsldwi 35, 1, 1, 3 +; PC64LE-NEXT: vmrghw 2, 3, 2 ; PC64LE-NEXT: lvx 3, 0, 3 -; PC64LE-NEXT: xxsldwi 36, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 36, 0, 0, 3 ; PC64LE-NEXT: vperm 2, 4, 2, 3 ; PC64LE-NEXT: blr ; @@ -4773,10 +4773,10 @@ define <3 x float> @constrained_vector_rint_v3f32() #0 { ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xscvdpspn 1, 1 ; PC64LE9-NEXT: xscvdpspn 2, 2 -; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1 -; PC64LE9-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE9-NEXT: xxsldwi 34, 2, 2, 1 -; PC64LE9-NEXT: vmrglw 2, 3, 2 +; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3 +; PC64LE9-NEXT: xxsldwi 35, 1, 1, 3 +; PC64LE9-NEXT: xxsldwi 34, 2, 2, 3 +; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: lxvx 35, 0, 3 ; PC64LE9-NEXT: vperm 2, 4, 2, 3 ; PC64LE9-NEXT: blr @@ -4947,12 +4947,12 @@ define <3 x float> @constrained_vector_nearbyint_v3f32() #0 { ; PC64LE-NEXT: addis 3, 2, .LCPI82_3@toc@ha ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI82_3@toc@l -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE-NEXT: xscvdpspn 0, 31 -; PC64LE-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE-NEXT: vmrglw 2, 2, 3 +; PC64LE-NEXT: xxsldwi 35, 1, 1, 3 +; PC64LE-NEXT: vmrghw 2, 2, 3 ; PC64LE-NEXT: lvx 3, 0, 3 -; PC64LE-NEXT: xxsldwi 36, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 36, 0, 0, 3 ; PC64LE-NEXT: vperm 2, 4, 2, 3 ; PC64LE-NEXT: addi 1, 1, 48 ; PC64LE-NEXT: ld 0, 16(1) @@ -4983,15 +4983,15 @@ define <3 x float> @constrained_vector_nearbyint_v3f32() #0 { ; PC64LE9-NEXT: bl nearbyintf ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 31 ; PC64LE9-NEXT: addis 3, 2, .LCPI82_3@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI82_3@toc@l -; PC64LE9-NEXT: vmrglw 2, 3, 2 +; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: lxvx 35, 0, 3 -; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 4, 2, 3 ; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) @@ -5184,11 +5184,11 @@ define <3 x float> @constrained_vector_maxnum_v3f32() #0 { ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI87_5@toc@l ; PC64LE-NEXT: lvx 4, 0, 3 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE-NEXT: xscvdpspn 0, 30 -; PC64LE-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE-NEXT: vmrglw 2, 2, 3 -; PC64LE-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 35, 1, 1, 3 +; PC64LE-NEXT: vmrghw 2, 2, 3 +; PC64LE-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE-NEXT: vperm 2, 3, 2, 4 ; PC64LE-NEXT: addi 1, 1, 64 ; PC64LE-NEXT: ld 0, 16(1) @@ -5227,15 +5227,15 @@ define <3 x float> @constrained_vector_maxnum_v3f32() #0 { ; PC64LE9-NEXT: bl fmaxf ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 29 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI87_5@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI87_5@toc@l ; PC64LE9-NEXT: lxvx 36, 0, 3 -; PC64LE9-NEXT: vmrglw 2, 3, 2 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: vmrghw 2, 3, 2 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 3, 2, 4 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) @@ -5471,11 +5471,11 @@ define <3 x float> @constrained_vector_minnum_v3f32() #0 { ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI92_5@toc@l ; PC64LE-NEXT: lvx 4, 0, 3 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE-NEXT: xscvdpspn 0, 30 -; PC64LE-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE-NEXT: vmrglw 2, 2, 3 -; PC64LE-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 35, 1, 1, 3 +; PC64LE-NEXT: vmrghw 2, 2, 3 +; PC64LE-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE-NEXT: vperm 2, 3, 2, 4 ; PC64LE-NEXT: addi 1, 1, 64 ; PC64LE-NEXT: ld 0, 16(1) @@ -5514,15 +5514,15 @@ define <3 x float> @constrained_vector_minnum_v3f32() #0 { ; PC64LE9-NEXT: bl fminf ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 29 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI92_5@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI92_5@toc@l ; PC64LE9-NEXT: lxvx 36, 0, 3 -; PC64LE9-NEXT: vmrglw 2, 3, 2 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: vmrghw 2, 3, 2 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 3, 2, 4 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) @@ -5686,9 +5686,9 @@ define <2 x float> @constrained_vector_fptrunc_v2f64() #0 { ; PC64LE-NEXT: xsrsp 1, 1 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xscvdpspn 1, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 -; PC64LE-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE-NEXT: vmrglw 2, 3, 2 +; PC64LE-NEXT: xxsldwi 34, 0, 0, 3 +; PC64LE-NEXT: xxsldwi 35, 1, 1, 3 +; PC64LE-NEXT: vmrghw 2, 3, 2 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fptrunc_v2f64: @@ -5698,12 +5698,12 @@ define <2 x float> @constrained_vector_fptrunc_v2f64() #0 { ; PC64LE9-NEXT: addis 3, 2, .LCPI96_1@toc@ha ; PC64LE9-NEXT: xsrsp 0, 0 ; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: lfd 0, .LCPI96_1@toc@l(3) ; PC64LE9-NEXT: xsrsp 0, 0 ; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 -; PC64LE9-NEXT: vmrglw 2, 3, 2 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 +; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: blr entry: %result = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64( @@ -5729,12 +5729,12 @@ define <3 x float> @constrained_vector_fptrunc_v3f64() #0 { ; PC64LE-NEXT: xsrsp 2, 2 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xscvdpspn 1, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE-NEXT: xscvdpspn 0, 2 -; PC64LE-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE-NEXT: vmrglw 2, 3, 2 +; PC64LE-NEXT: xxsldwi 35, 1, 1, 3 +; PC64LE-NEXT: vmrghw 2, 3, 2 ; PC64LE-NEXT: lvx 3, 0, 3 -; PC64LE-NEXT: xxsldwi 36, 0, 0, 1 +; PC64LE-NEXT: xxsldwi 36, 0, 0, 3 ; PC64LE-NEXT: vperm 2, 4, 2, 3 ; PC64LE-NEXT: blr ; @@ -5745,20 +5745,20 @@ define <3 x float> @constrained_vector_fptrunc_v3f64() #0 { ; PC64LE9-NEXT: addis 3, 2, .LCPI97_1@toc@ha ; PC64LE9-NEXT: xsrsp 0, 0 ; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: lfd 0, .LCPI97_1@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI97_2@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI97_2@toc@l ; PC64LE9-NEXT: xsrsp 0, 0 ; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 -; PC64LE9-NEXT: vmrglw 2, 3, 2 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 +; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: lxvx 35, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI97_3@toc@ha ; PC64LE9-NEXT: lfd 0, .LCPI97_3@toc@l(3) ; PC64LE9-NEXT: xsrsp 0, 0 ; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 4, 2, 3 ; PC64LE9-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll index 8b4e3640ef6b..4a78218262ca 100644 --- a/llvm/test/CodeGen/PowerPC/vsx.ll +++ b/llvm/test/CodeGen/PowerPC/vsx.ll @@ -1404,9 +1404,9 @@ define <2 x float> @test44(<2 x i64> %a) { ; CHECK-LE-NEXT: xscvuxdsp f0, f0 ; CHECK-LE-NEXT: xscvdpspn vs1, f1 ; CHECK-LE-NEXT: xscvdpspn vs0, f0 -; CHECK-LE-NEXT: xxsldwi v3, vs1, vs1, 1 -; CHECK-LE-NEXT: xxsldwi v2, vs0, vs0, 1 -; CHECK-LE-NEXT: vmrglw v2, v3, v2 +; CHECK-LE-NEXT: xxsldwi v3, vs1, vs1, 3 +; CHECK-LE-NEXT: xxsldwi v2, vs0, vs0, 3 +; CHECK-LE-NEXT: vmrghw v2, v3, v2 ; CHECK-LE-NEXT: blr %v = uitofp <2 x i64> %a to <2 x float> ret <2 x float> %v @@ -1486,9 +1486,9 @@ define <2 x float> @test45(<2 x i64> %a) { ; CHECK-LE-NEXT: xscvsxdsp f0, f0 ; CHECK-LE-NEXT: xscvdpspn vs1, f1 ; CHECK-LE-NEXT: xscvdpspn vs0, f0 -; CHECK-LE-NEXT: xxsldwi v3, vs1, vs1, 1 -; CHECK-LE-NEXT: xxsldwi v2, vs0, vs0, 1 -; CHECK-LE-NEXT: vmrglw v2, v3, v2 +; CHECK-LE-NEXT: xxsldwi v3, vs1, vs1, 3 +; CHECK-LE-NEXT: xxsldwi v2, vs0, vs0, 3 +; CHECK-LE-NEXT: vmrghw v2, v3, v2 ; CHECK-LE-NEXT: blr %v = sitofp <2 x i64> %a to <2 x float> ret <2 x float> %v @@ -2437,12 +2437,11 @@ define <2 x i32> @test80(i32 %v) { ; ; CHECK-LE-LABEL: test80: ; CHECK-LE: # %bb.0: -; CHECK-LE-NEXT: mtfprd f0, r3 +; CHECK-LE-NEXT: mtfprwz f0, r3 ; CHECK-LE-NEXT: addis r4, r2, .LCPI65_0@toc@ha ; CHECK-LE-NEXT: addi r3, r4, .LCPI65_0@toc@l -; CHECK-LE-NEXT: xxswapd vs0, vs0 +; CHECK-LE-NEXT: xxspltw v2, vs0, 1 ; CHECK-LE-NEXT: lvx v3, 0, r3 -; CHECK-LE-NEXT: xxspltw v2, vs0, 3 ; CHECK-LE-NEXT: vadduwm v2, v2, v3 ; CHECK-LE-NEXT: blr %b1 = insertelement <2 x i32> undef, i32 %v, i32 0 diff --git a/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll b/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll index 5c05f8dc3d81..a198604f79a4 100644 --- a/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll +++ b/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll @@ -17,17 +17,15 @@ define <2 x double> @testi0(<2 x double>* %p1, double* %p2) { ; CHECK-NEXT: lxvd2x vs0, 0, r3 ; CHECK-NEXT: lfdx f1, 0, r4 ; CHECK-NEXT: xxswapd vs0, vs0 -; CHECK-NEXT: xxspltd vs1, vs1, 0 -; CHECK-NEXT: xxpermdi v2, vs0, vs1, 1 +; CHECK-NEXT: xxmrghd v2, vs0, vs1 ; CHECK-NEXT: blr ; ; CHECK-P9-VECTOR-LABEL: testi0: ; CHECK-P9-VECTOR: # %bb.0: ; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P9-VECTOR-NEXT: lfdx f1, 0, r4 -; CHECK-P9-VECTOR-NEXT: xxspltd vs1, vs1, 0 ; CHECK-P9-VECTOR-NEXT: xxswapd vs0, vs0 -; CHECK-P9-VECTOR-NEXT: xxpermdi v2, vs0, vs1, 1 +; CHECK-P9-VECTOR-NEXT: xxmrghd v2, vs0, vs1 ; CHECK-P9-VECTOR-NEXT: blr ; ; CHECK-P9-LABEL: testi0: @@ -51,17 +49,15 @@ define <2 x double> @testi1(<2 x double>* %p1, double* %p2) { ; CHECK-NEXT: lxvd2x vs0, 0, r3 ; CHECK-NEXT: lfdx f1, 0, r4 ; CHECK-NEXT: xxswapd vs0, vs0 -; CHECK-NEXT: xxspltd vs1, vs1, 0 -; CHECK-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-NEXT: xxpermdi v2, vs1, vs0, 1 ; CHECK-NEXT: blr ; ; CHECK-P9-VECTOR-LABEL: testi1: ; CHECK-P9-VECTOR: # %bb.0: ; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P9-VECTOR-NEXT: lfdx f1, 0, r4 -; CHECK-P9-VECTOR-NEXT: xxspltd vs1, vs1, 0 ; CHECK-P9-VECTOR-NEXT: xxswapd vs0, vs0 -; CHECK-P9-VECTOR-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-P9-VECTOR-NEXT: xxpermdi v2, vs1, vs0, 1 ; CHECK-P9-VECTOR-NEXT: blr ; ; CHECK-P9-LABEL: testi1: