[PowerPC] Canonicalize shuffles to match more single-instruction masks on LE

We currently miss a number of opportunities to emit single-instruction
VMRG[LH][BHW] instructions for shuffles on little endian subtargets. Although
this in itself is not a huge performance opportunity since loading the permute
vector for a VPERM can always be pulled out of loops, producing such merge
instructions is useful to downstream optimizations.
Since VPERM is essentially opaque to all subsequent optimizations, we want to
avoid it as much as possible. Other permute instructions have semantics that can
be reasoned about much more easily in later optimizations.

This patch does the following:
- Canonicalize shuffles so that the first element comes from the first vector
  (since that's what most of the mask matching functions want)
- Switch the elements that come from splat vectors so that they match the
  corresponding elements from the other vector (to allow for merges)
- Adds debugging messages for when a shuffle is matched to a VPERM so that
  anyone interested in improving this further can get the info for their code

Differential revision: https://reviews.llvm.org/D77448
This commit is contained in:
Nemanja Ivanovic 2020-06-18 21:53:50 -05:00
parent 8f3b2c8aa3
commit 1fed131660
35 changed files with 2893 additions and 3352 deletions

View File

@ -125,6 +125,7 @@ cl::desc("use absolute jump tables on ppc"), cl::Hidden);
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumSiblingCalls, "Number of sibling calls");
STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
@ -1505,6 +1506,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
case PPCISD::ANDI_rec_1_EQ_BIT:
return "PPCISD::ANDI_rec_1_EQ_BIT";
case PPCISD::ANDI_rec_1_GT_BIT:
@ -2716,7 +2719,8 @@ static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
UI != UE; ++UI)
if (UI.getUse().get().getResNo() == 0 &&
UI->getOpcode() != ISD::SCALAR_TO_VECTOR)
UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
return false;
return true;
@ -9041,7 +9045,8 @@ static const SDValue *getNormalLoadInput(const SDValue &Op) {
const SDValue *InputLoad = &Op;
if (InputLoad->getOpcode() == ISD::BITCAST)
InputLoad = &InputLoad->getOperand(0);
if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR)
if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED)
InputLoad = &InputLoad->getOperand(0);
if (InputLoad->getOpcode() != ISD::LOAD)
return nullptr;
@ -9690,6 +9695,15 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
// Any nodes that were combined in the target-independent combiner prior
// to vector legalization will not be sent to the target combine. Try to
// combine it here.
if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
DAG.ReplaceAllUsesOfValueWith(Op, NewShuffle);
Op = NewShuffle;
SVOp = cast<ShuffleVectorSDNode>(Op);
}
EVT VT = Op.getValueType();
bool isLittleEndian = Subtarget.isLittleEndian();
@ -9715,6 +9729,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
else
Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
// If we are loading a partial vector, it does not make sense to adjust
// the base pointer. This happens with (splat (s_to_v_permuted (ld))).
if (LD->getMemoryVT().getSizeInBits() == (IsFourByte ? 32 : 64))
Offset = 0;
SDValue BasePtr = LD->getBasePtr();
if (Offset != 0)
BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
@ -9988,7 +10007,13 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
MVT::i32));
}
ShufflesHandledWithVPERM++;
SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n");
LLVM_DEBUG(SVOp->dump());
LLVM_DEBUG(dbgs() << "With the following permute control vector:\n");
LLVM_DEBUG(VPermMask.dump());
if (isLittleEndian)
return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
V2, V1, VPermMask);
@ -14114,6 +14139,199 @@ SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
return Val;
}
static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
// Check that the source of the element keeps flipping
// (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
bool PrevElemFromFirstVec = Mask[0] < NumElts;
for (int i = 1, e = Mask.size(); i < e; i++) {
if (PrevElemFromFirstVec && Mask[i] < NumElts)
return false;
if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
return false;
PrevElemFromFirstVec = !PrevElemFromFirstVec;
}
return true;
}
static bool isSplatBV(SDValue Op) {
if (Op.getOpcode() != ISD::BUILD_VECTOR)
return false;
SDValue FirstOp;
// Find first non-undef input.
for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
FirstOp = Op.getOperand(i);
if (!FirstOp.isUndef())
break;
}
// All inputs are undef or the same as the first non-undef input.
for (int i = 1, e = Op.getNumOperands(); i < e; i++)
if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
return false;
return true;
}
static SDValue isScalarToVec(SDValue Op) {
if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
return Op;
if (Op.getOpcode() != ISD::BITCAST)
return SDValue();
Op = Op.getOperand(0);
if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
return Op;
return SDValue();
}
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
int LHSMaxIdx, int RHSMinIdx,
int RHSMaxIdx, int HalfVec) {
for (int i = 0, e = ShuffV.size(); i < e; i++) {
int Idx = ShuffV[i];
if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
ShuffV[i] += HalfVec;
}
return;
}
// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
// the original is:
// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
// In such a case, just change the shuffle mask to extract the element
// from the permuted index.
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG) {
SDLoc dl(OrigSToV);
EVT VT = OrigSToV.getValueType();
assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
"Expecting a SCALAR_TO_VECTOR here");
SDValue Input = OrigSToV.getOperand(0);
if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
SDValue OrigVector = Input.getOperand(0);
// Can't handle non-const element indices or different vector types
// for the input to the extract and the output of the scalar_to_vector.
if (Idx && VT == OrigVector.getValueType()) {
SmallVector<int, 16> NewMask(VT.getVectorNumElements(), -1);
NewMask[VT.getVectorNumElements() / 2] = Idx->getZExtValue();
return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
}
}
return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
OrigSToV.getOperand(0));
}
// On little endian subtargets, combine shuffles such as:
// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
// into:
// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
// because the latter can be matched to a single instruction merge.
// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
// to put the value into element zero. Adjust the shuffle mask so that the
// vector can remain in permuted form (to prevent a swap prior to a shuffle).
SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
SelectionDAG &DAG) const {
SDValue LHS = SVN->getOperand(0);
SDValue RHS = SVN->getOperand(1);
auto Mask = SVN->getMask();
int NumElts = LHS.getValueType().getVectorNumElements();
SDValue Res(SVN, 0);
SDLoc dl(SVN);
// None of these combines are useful on big endian systems since the ISA
// already has a big endian bias.
if (!Subtarget.isLittleEndian())
return Res;
// If this is not a shuffle of a shuffle and the first element comes from
// the second vector, canonicalize to the commuted form. This will make it
// more likely to match one of the single instruction patterns.
if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
std::swap(LHS, RHS);
Res = DAG.getCommutedVectorShuffle(*SVN);
Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
}
// Adjust the shuffle mask if either input vector comes from a
// SCALAR_TO_VECTOR and keep the respective input vector in permuted
// form (to prevent the need for a swap).
SmallVector<int, 16> ShuffV(Mask.begin(), Mask.end());
SDValue SToVLHS = isScalarToVec(LHS);
SDValue SToVRHS = isScalarToVec(RHS);
if (SToVLHS || SToVRHS) {
int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
: SToVRHS.getValueType().getVectorNumElements();
int NumEltsOut = ShuffV.size();
// Initially assume that neither input is permuted. These will be adjusted
// accordingly if either input is.
int LHSMaxIdx = -1;
int RHSMinIdx = -1;
int RHSMaxIdx = -1;
int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
// Get the permuted scalar to vector nodes for the source(s) that come from
// ISD::SCALAR_TO_VECTOR.
if (SToVLHS) {
// Set up the values for the shuffle vector fixup.
LHSMaxIdx = NumEltsOut / NumEltsIn;
SToVLHS = getSToVPermuted(SToVLHS, DAG);
if (SToVLHS.getValueType() != LHS.getValueType())
SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
LHS = SToVLHS;
}
if (SToVRHS) {
RHSMinIdx = NumEltsOut;
RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
SToVRHS = getSToVPermuted(SToVRHS, DAG);
if (SToVRHS.getValueType() != RHS.getValueType())
SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
RHS = SToVRHS;
}
// Fix up the shuffle mask to reflect where the desired element actually is.
// The minimum and maximum indices that correspond to element zero for both
// the LHS and RHS are computed and will control which shuffle mask entries
// are to be changed. For example, if the RHS is permuted, any shuffle mask
// entries in the range [RHSMinIdx,RHSMaxIdx) will be incremented by
// HalfVec to refer to the corresponding element in the permuted vector.
fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
HalfVec);
Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
// We may have simplified away the shuffle. We won't be able to do anything
// further with it here.
if (!isa<ShuffleVectorSDNode>(Res))
return Res;
Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
}
// The common case after we commuted the shuffle is that the RHS is a splat
// and we have elements coming in from the splat at indices that are not
// conducive to using a merge.
// Example:
// vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
if (!isSplatBV(RHS))
return Res;
// We are looking for a mask such that all even elements are from
// one vector and all odd elements from the other.
if (!isAlternatingShuffMask(Mask, NumElts))
return Res;
// Adjust the mask so we are pulling in the same index from the splat
// as the index from the interesting vector in consecutive elements.
// Example:
// vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
for (int i = 1, e = Mask.size(); i < e; i += 2)
ShuffV[i] = (ShuffV[i - 1] + NumElts);
Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
return Res;
}
SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
LSBaseSDNode *LSBase,
DAGCombinerInfo &DCI) const {
@ -14223,7 +14441,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
}
break;
return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
case ISD::STORE: {
EVT Op1VT = N->getOperand(1).getValueType();

View File

@ -221,6 +221,14 @@ namespace llvm {
/// As with SINT_VEC_TO_FP, used for converting illegal types.
UINT_VEC_TO_FP,
/// PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to
/// place the value into the least significant element of the most
/// significant doubleword in the vector. This is not element zero for
/// anything smaller than a doubleword on either endianness. This node has
/// the same semantics as SCALAR_TO_VECTOR except that the value remains in
/// the aforementioned location in the vector register.
SCALAR_TO_VECTOR_PERMUTED,
// FIXME: Remove these once the ANDI glue bug is fixed:
/// i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
/// eq or gt bit of CR0 after executing andi. x, 1. This is used to
@ -1215,6 +1223,8 @@ namespace llvm {
SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineVectorShuffle(ShuffleVectorSDNode *SVN,
SelectionDAG &DAG) const;
SDValue combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase,
DAGCombinerInfo &DCI) const;

View File

@ -138,6 +138,8 @@ def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCldsplat : SDNode<"PPCISD::LD_SPLAT", SDT_PPCldsplat,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCSToV : SDNode<"PPCISD::SCALAR_TO_VECTOR_PERMUTED",
SDTypeProfile<1, 1, []>, []>;
//-------------------------- Predicate definitions ---------------------------//
def HasVSX : Predicate<"PPCSubTarget->hasVSX()">;
@ -288,6 +290,11 @@ class X_XS6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
} // Predicates = HasP9Vector
} // AddedComplexity = 400, hasSideEffects = 0
multiclass ScalToVecWPermute<ValueType Ty, dag In, dag NonPermOut, dag PermOut> {
def : Pat<(Ty (scalar_to_vector In)), (Ty NonPermOut)>;
def : Pat<(Ty (PPCSToV In)), (Ty PermOut)>;
}
//-------------------------- Instruction definitions -------------------------//
// VSX instructions require the VSX feature, they are to be selected over
// equivalent Altivec patterns (as they address a larger register set) and
@ -2710,12 +2717,14 @@ def : Pat<(v2i64 (build_vector DblToLong.A, DblToLong.A)),
def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)),
(v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC),
(COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 0))>;
def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
(XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>;
def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
(XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>;
defm : ScalToVecWPermute<
v4i32, FltToIntLoad.A,
(XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1),
(COPY_TO_REGCLASS (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC)>;
defm : ScalToVecWPermute<
v4i32, FltToUIntLoad.A,
(XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1),
(COPY_TO_REGCLASS (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC)>;
def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),
(v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;
def : Pat<(v2f64 (PPCldsplat xoaddr:$A)),
@ -2730,10 +2739,12 @@ def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)),
def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)),
(v2i64 (XXPERMDIs
(COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>;
def : Pat<(v2i64 (scalar_to_vector DblToLongLoad.A)),
(v2i64 (XVCVDPSXDS (LXVDSX xoaddr:$A)))>;
def : Pat<(v2i64 (scalar_to_vector DblToULongLoad.A)),
(v2i64 (XVCVDPUXDS (LXVDSX xoaddr:$A)))>;
defm : ScalToVecWPermute<
v2i64, DblToLongLoad.A,
(XVCVDPSXDS (LXVDSX xoaddr:$A)), (XVCVDPSXDS (LXVDSX xoaddr:$A))>;
defm : ScalToVecWPermute<
v2i64, DblToULongLoad.A,
(XVCVDPUXDS (LXVDSX xoaddr:$A)), (XVCVDPUXDS (LXVDSX xoaddr:$A))>;
} // HasVSX
// Any big endian VSX subtarget.
@ -2831,9 +2842,10 @@ def : Pat<WToDPExtractConv.BV13U,
// Any little endian VSX subtarget.
let Predicates = [HasVSX, IsLittleEndian] in {
def : Pat<(v2f64 (scalar_to_vector f64:$A)),
(v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64),
(SUBREG_TO_REG (i64 1), $A, sub_64), 0))>;
defm : ScalToVecWPermute<v2f64, (f64 f64:$A),
(XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64),
(SUBREG_TO_REG (i64 1), $A, sub_64), 0),
(SUBREG_TO_REG (i64 1), $A, sub_64)>;
def : Pat<(f64 (extractelt v2f64:$S, 0)),
(f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
@ -2943,18 +2955,24 @@ def : Pat<(PPCstore_scal_int_from_vsr
(STXSDX (XSCVDPUXDS f64:$src), xoaddr:$dst)>;
// Load-and-splat with fp-to-int conversion (using X-Form VSX/FP loads).
def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
(XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>;
def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
(XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>;
def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)),
(v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
(XFLOADf32 xoaddr:$A), VSFRC)), 0))>;
def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)),
(v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
(XFLOADf32 xoaddr:$A), VSFRC)), 0))>;
defm : ScalToVecWPermute<
v4i32, DblToIntLoad.A,
(XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC), 1),
(COPY_TO_REGCLASS (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC)>;
defm : ScalToVecWPermute<
v4i32, DblToUIntLoad.A,
(XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC), 1),
(COPY_TO_REGCLASS (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC)>;
defm : ScalToVecWPermute<
v2i64, FltToLongLoad.A,
(XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A), VSFRC)), 0),
(SUBREG_TO_REG (i64 1), (XSCVDPSXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A),
VSFRC)), sub_64)>;
defm : ScalToVecWPermute<
v2i64, FltToULongLoad.A,
(XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A), VSFRC)), 0),
(SUBREG_TO_REG (i64 1), (XSCVDPUXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A),
VSFRC)), sub_64)>;
} // HasVSX, NoP9Vector
// Any VSX subtarget that only has loads and stores that load in big endian
@ -3156,8 +3174,12 @@ def : Pat<DWToSPExtractConv.El1US1,
(f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
// v4f32 scalar <-> vector conversions (LE)
def : Pat<(v4f32 (scalar_to_vector f32:$A)),
(v4f32 (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1))>;
// The permuted version is no better than the version that puts the value
// into the right element because XSCVDPSPN is different from all the other
// instructions used for PPCSToV.
defm : ScalToVecWPermute<v4f32, (f32 f32:$A),
(XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1),
(XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 3)>;
def : Pat<(f32 (vector_extract v4f32:$S, 0)),
(f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
def : Pat<(f32 (vector_extract v4f32:$S, 1)),
@ -3189,18 +3211,25 @@ def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
// LIWAX - This instruction is used for sign extending i32 -> i64.
// LIWZX - This instruction will be emitted for i32, f32, and when
// zero-extending i32 to i64 (zext i32 -> i64).
def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))),
(v2i64 (XXPERMDIs
(COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSFRC), 2))>;
def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))),
(v2i64 (XXPERMDIs
(COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2))>;
def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
(v4i32 (XXPERMDIs
(COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2))>;
def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
(v4f32 (XXPERMDIs
(COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2))>;
defm : ScalToVecWPermute<
v2i64, (i64 (sextloadi32 xoaddr:$src)),
(XXPERMDIs (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSFRC), 2),
(SUBREG_TO_REG (i64 1), (LIWAX xoaddr:$src), sub_64)>;
defm : ScalToVecWPermute<
v2i64, (i64 (zextloadi32 xoaddr:$src)),
(XXPERMDIs (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2),
(SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>;
defm : ScalToVecWPermute<
v4i32, (i32 (load xoaddr:$src)),
(XXPERMDIs (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2),
(SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>;
defm : ScalToVecWPermute<
v4f32, (f32 (load xoaddr:$src)),
(XXPERMDIs (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2),
(SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>;
def : Pat<DWToSPExtractConv.BVU,
(v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3),
@ -3336,14 +3365,17 @@ def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)),
// Little endian VSX subtarget with direct moves.
let Predicates = [HasVSX, HasDirectMove, IsLittleEndian] in {
// v16i8 scalar <-> vector conversions (LE)
def : Pat<(v16i8 (scalar_to_vector i32:$A)),
(v16i8 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>;
def : Pat<(v8i16 (scalar_to_vector i32:$A)),
(v8i16 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>;
def : Pat<(v4i32 (scalar_to_vector i32:$A)),
(v4i32 MovesToVSR.LE_WORD_0)>;
def : Pat<(v2i64 (scalar_to_vector i64:$A)),
(v2i64 MovesToVSR.LE_DWORD_0)>;
defm : ScalToVecWPermute<v16i8, (i32 i32:$A),
(COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC),
(COPY_TO_REGCLASS MovesToVSR.LE_WORD_1, VSRC)>;
defm : ScalToVecWPermute<v8i16, (i32 i32:$A),
(COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC),
(COPY_TO_REGCLASS MovesToVSR.LE_WORD_1, VSRC)>;
defm : ScalToVecWPermute<v4i32, (i32 i32:$A), MovesToVSR.LE_WORD_0,
(SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64)>;
defm : ScalToVecWPermute<v2i64, (i64 i64:$A), MovesToVSR.LE_DWORD_0,
MovesToVSR.LE_DWORD_1>;
// v2i64 scalar <-> vector conversions (LE)
def : Pat<(i64 (vector_extract v2i64:$S, 0)),
(i64 VectorExtractions.LE_DWORD_0)>;
@ -3641,30 +3673,41 @@ def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
(STXVX $rS, xoaddr:$dst)>;
// Build vectors from i8 loads
def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)),
(v16i8 (VSPLTBs 7, (LXSIBZX xoaddr:$src)))>;
def : Pat<(v8i16 (scalar_to_vector ScalarLoads.ZELi8)),
(v8i16 (VSPLTHs 3, (LXSIBZX xoaddr:$src)))>;
def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi8)),
(v4i32 (XXSPLTWs (LXSIBZX xoaddr:$src), 1))>;
def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi8i64)),
(v2i64 (XXPERMDIs (LXSIBZX xoaddr:$src), 0))>;
def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi8)),
(v4i32 (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1))>;
def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi8i64)),
(v2i64 (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0))>;
defm : ScalToVecWPermute<v16i8, ScalarLoads.Li8,
(VSPLTBs 7, (LXSIBZX xoaddr:$src)),
(VSPLTBs 7, (LXSIBZX xoaddr:$src))>;
defm : ScalToVecWPermute<v8i16, ScalarLoads.ZELi8,
(VSPLTHs 3, (LXSIBZX xoaddr:$src)),
(VSPLTHs 3, (LXSIBZX xoaddr:$src))>;
defm : ScalToVecWPermute<v4i32, ScalarLoads.ZELi8,
(XXSPLTWs (LXSIBZX xoaddr:$src), 1),
(XXSPLTWs (LXSIBZX xoaddr:$src), 1)>;
defm : ScalToVecWPermute<v2i64, ScalarLoads.ZELi8i64,
(XXPERMDIs (LXSIBZX xoaddr:$src), 0),
(XXPERMDIs (LXSIBZX xoaddr:$src), 0)>;
defm : ScalToVecWPermute<v4i32, ScalarLoads.SELi8,
(XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1),
(XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1)>;
defm : ScalToVecWPermute<v2i64, ScalarLoads.SELi8i64,
(XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0),
(XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0)>;
// Build vectors from i16 loads
def : Pat<(v8i16 (scalar_to_vector ScalarLoads.Li16)),
(v8i16 (VSPLTHs 3, (LXSIHZX xoaddr:$src)))>;
def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi16)),
(v4i32 (XXSPLTWs (LXSIHZX xoaddr:$src), 1))>;
def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi16i64)),
(v2i64 (XXPERMDIs (LXSIHZX xoaddr:$src), 0))>;
def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi16)),
(v4i32 (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1))>;
def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)),
(v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>;
defm : ScalToVecWPermute<v8i16, ScalarLoads.Li16,
(VSPLTHs 3, (LXSIHZX xoaddr:$src)),
(VSPLTHs 3, (LXSIHZX xoaddr:$src))>;
defm : ScalToVecWPermute<v4i32, ScalarLoads.ZELi16,
(XXSPLTWs (LXSIHZX xoaddr:$src), 1),
(XXSPLTWs (LXSIHZX xoaddr:$src), 1)>;
defm : ScalToVecWPermute<v2i64, ScalarLoads.ZELi16i64,
(XXPERMDIs (LXSIHZX xoaddr:$src), 0),
(XXPERMDIs (LXSIHZX xoaddr:$src), 0)>;
defm : ScalToVecWPermute<v4i32, ScalarLoads.SELi16,
(XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1),
(XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1)>;
defm : ScalToVecWPermute<v2i64, ScalarLoads.SELi16i64,
(XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0),
(XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0)>;
// Load/convert and convert/store patterns for f16.
def : Pat<(f64 (extloadf16 xoaddr:$src)),
@ -3806,8 +3849,7 @@ def : Pat<(f32 (PPCxsminc f32:$XA, f32:$XB)),
VSSRC))>;
// Endianness-neutral patterns for const splats with ISA 3.0 instructions.
def : Pat<(v4i32 (scalar_to_vector i32:$A)),
(v4i32 (MTVSRWS $A))>;
defm : ScalToVecWPermute<v4i32, (i32 i32:$A), (MTVSRWS $A), (MTVSRWS $A)>;
def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
(v4i32 (MTVSRWS $A))>;
def : Pat<(v16i8 (build_vector immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
@ -3819,24 +3861,32 @@ def : Pat<(v16i8 (build_vector immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A)),
(v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>;
def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)),
(v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>;
def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
(v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>;
def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
(XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>;
def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
(XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>;
def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)),
(v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
(DFLOADf32 iaddrX4:$A),
VSFRC)), 0))>;
def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)),
(v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
(DFLOADf32 iaddrX4:$A),
VSFRC)), 0))>;
defm : ScalToVecWPermute<v4i32, FltToIntLoad.A,
(XVCVSPSXWS (LXVWSX xoaddr:$A)),
(XVCVSPSXWS (LXVWSX xoaddr:$A))>;
defm : ScalToVecWPermute<v4i32, FltToUIntLoad.A,
(XVCVSPUXWS (LXVWSX xoaddr:$A)),
(XVCVSPUXWS (LXVWSX xoaddr:$A))>;
defm : ScalToVecWPermute<
v4i32, DblToIntLoadP9.A,
(XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1),
(SUBREG_TO_REG (i64 1), (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), sub_64)>;
defm : ScalToVecWPermute<
v4i32, DblToUIntLoadP9.A,
(XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1),
(SUBREG_TO_REG (i64 1), (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), sub_64)>;
defm : ScalToVecWPermute<
v2i64, FltToLongLoadP9.A,
(XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), 0),
(SUBREG_TO_REG
(i64 1),
(XSCVDPSXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), sub_64)>;
defm : ScalToVecWPermute<
v2i64, FltToULongLoadP9.A,
(XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), 0),
(SUBREG_TO_REG
(i64 1),
(XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), sub_64)>;
def : Pat<(v4f32 (PPCldsplat xoaddr:$A)),
(v4f32 (LXVWSX xoaddr:$A))>;
def : Pat<(v4i32 (PPCldsplat xoaddr:$A)),
@ -4116,19 +4166,23 @@ def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
(STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))),
(v2i64 (XXPERMDIs
(COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2))>;
def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))),
(v2i64 (XXPERMDIs
(COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2))>;
defm : ScalToVecWPermute<
v2i64, (i64 (load iaddrX4:$src)),
(XXPERMDIs (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2),
(SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64)>;
defm : ScalToVecWPermute<
v2i64, (i64 (load xaddrX4:$src)),
(XXPERMDIs (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2),
(SUBREG_TO_REG (i64 1), (XFLOADf64 xaddrX4:$src), sub_64)>;
defm : ScalToVecWPermute<
v2f64, (f64 (load iaddrX4:$src)),
(XXPERMDIs (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2),
(SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64)>;
defm : ScalToVecWPermute<
v2f64, (f64 (load xaddrX4:$src)),
(XXPERMDIs (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2),
(SUBREG_TO_REG (i64 1), (XFLOADf64 xaddrX4:$src), sub_64)>;
def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))),
(v2f64 (XXPERMDIs
(COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2))>;
def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))),
(v2f64 (XXPERMDIs
(COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2))>;
def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),
(XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
sub_64), xaddrX4:$src)>;

View File

@ -13,8 +13,7 @@ define void @testExpandPostRAPseudo(i32* nocapture readonly %ptr) {
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8: lfiwzx f0, 0, r3
; CHECK-P8: ld r4, .LC0@toc@l(r4)
; CHECK-P8: xxswapd vs0, f0
; CHECK-P8: xxspltw v2, vs0, 3
; CHECK-P8: xxspltw v2, vs0, 1
; CHECK-P8: stvx v2, 0, r4
; CHECK-P8: lis r4, 1024
; CHECK-P8: lfiwax f0, 0, r3

View File

@ -1282,8 +1282,7 @@ define <4 x i32> @spltMemVali(i32* nocapture readonly %ptr) {
; P8LE-LABEL: spltMemVali:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: xxswapd vs0, f0
; P8LE-NEXT: xxspltw v2, vs0, 3
; P8LE-NEXT: xxspltw v2, vs0, 1
; P8LE-NEXT: blr
entry:
%0 = load i32, i32* %ptr, align 4
@ -2801,8 +2800,7 @@ define <4 x i32> @spltMemValui(i32* nocapture readonly %ptr) {
; P8LE-LABEL: spltMemValui:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: xxswapd vs0, f0
; P8LE-NEXT: xxspltw v2, vs0, 3
; P8LE-NEXT: xxspltw v2, vs0, 1
; P8LE-NEXT: blr
entry:
%0 = load i32, i32* %ptr, align 4
@ -4573,7 +4571,7 @@ define <2 x i64> @spltMemValConvftoll(float* nocapture readonly %ptr) {
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfs f0, 0(r3)
; P9LE-NEXT: xscvdpsxds f0, f0
; P9LE-NEXT: xxspltd v2, f0, 0
; P9LE-NEXT: xxspltd v2, vs0, 0
; P9LE-NEXT: blr
;
; P8BE-LABEL: spltMemValConvftoll:
@ -4587,7 +4585,7 @@ define <2 x i64> @spltMemValConvftoll(float* nocapture readonly %ptr) {
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfsx f0, 0, r3
; P8LE-NEXT: xscvdpsxds f0, f0
; P8LE-NEXT: xxspltd v2, f0, 0
; P8LE-NEXT: xxspltd v2, vs0, 0
; P8LE-NEXT: blr
entry:
%0 = load float, float* %ptr, align 4
@ -5761,7 +5759,7 @@ define <2 x i64> @spltMemValConvftoull(float* nocapture readonly %ptr) {
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfs f0, 0(r3)
; P9LE-NEXT: xscvdpuxds f0, f0
; P9LE-NEXT: xxspltd v2, f0, 0
; P9LE-NEXT: xxspltd v2, vs0, 0
; P9LE-NEXT: blr
;
; P8BE-LABEL: spltMemValConvftoull:
@ -5775,7 +5773,7 @@ define <2 x i64> @spltMemValConvftoull(float* nocapture readonly %ptr) {
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfsx f0, 0, r3
; P8LE-NEXT: xscvdpuxds f0, f0
; P8LE-NEXT: xxspltd v2, f0, 0
; P8LE-NEXT: xxspltd v2, vs0, 0
; P8LE-NEXT: blr
entry:
%0 = load float, float* %ptr, align 4

View File

@ -23,18 +23,12 @@ entry:
define dso_local <16 x i8> @testmrghb2(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
; CHECK-P8-LABEL: testmrghb2:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r3, r2, .LCPI1_0@toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI1_0@toc@l
; CHECK-P8-NEXT: lvx v4, 0, r3
; CHECK-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-P8-NEXT: vmrghb v2, v2, v3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: testmrghb2:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: addis r3, r2, .LCPI1_0@toc@ha
; CHECK-P9-NEXT: addi r3, r3, .LCPI1_0@toc@l
; CHECK-P9-NEXT: lxvx v4, 0, r3
; CHECK-P9-NEXT: vperm v2, v3, v2, v4
; CHECK-P9-NEXT: vmrghb v2, v2, v3
; CHECK-P9-NEXT: blr
entry:
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 24, i32 8, i32 25, i32 9, i32 26, i32 10, i32 27, i32 11, i32 28, i32 12, i32 29, i32 13, i32 30, i32 14, i32 31, i32 15>
@ -57,18 +51,12 @@ entry:
define dso_local <16 x i8> @testmrghh2(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
; CHECK-P8-LABEL: testmrghh2:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r3, r2, .LCPI3_0@toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI3_0@toc@l
; CHECK-P8-NEXT: lvx v4, 0, r3
; CHECK-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-P8-NEXT: vmrghh v2, v2, v3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: testmrghh2:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: addis r3, r2, .LCPI3_0@toc@ha
; CHECK-P9-NEXT: addi r3, r3, .LCPI3_0@toc@l
; CHECK-P9-NEXT: lxvx v4, 0, r3
; CHECK-P9-NEXT: vperm v2, v3, v2, v4
; CHECK-P9-NEXT: vmrghh v2, v2, v3
; CHECK-P9-NEXT: blr
entry:
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 24, i32 25, i32 8, i32 9, i32 26, i32 27, i32 10, i32 11, i32 28, i32 29, i32 12, i32 13, i32 30, i32 31, i32 14, i32 15>
@ -91,18 +79,12 @@ entry:
define dso_local <16 x i8> @testmrglb2(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
; CHECK-P8-LABEL: testmrglb2:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r3, r2, .LCPI5_0@toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI5_0@toc@l
; CHECK-P8-NEXT: lvx v4, 0, r3
; CHECK-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-P8-NEXT: vmrglb v2, v2, v3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: testmrglb2:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha
; CHECK-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l
; CHECK-P9-NEXT: lxvx v4, 0, r3
; CHECK-P9-NEXT: vperm v2, v3, v2, v4
; CHECK-P9-NEXT: vmrglb v2, v2, v3
; CHECK-P9-NEXT: blr
entry:
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 18, i32 2, i32 19, i32 3, i32 20, i32 4, i32 21, i32 5, i32 22, i32 6, i32 23, i32 7>
@ -125,18 +107,12 @@ entry:
define dso_local <16 x i8> @testmrglh2(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
; CHECK-P8-LABEL: testmrglh2:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r3, r2, .LCPI7_0@toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI7_0@toc@l
; CHECK-P8-NEXT: lvx v4, 0, r3
; CHECK-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-P8-NEXT: vmrglh v2, v2, v3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: testmrglh2:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: addis r3, r2, .LCPI7_0@toc@ha
; CHECK-P9-NEXT: addi r3, r3, .LCPI7_0@toc@l
; CHECK-P9-NEXT: lxvx v4, 0, r3
; CHECK-P9-NEXT: vperm v2, v3, v2, v4
; CHECK-P9-NEXT: vmrglh v2, v2, v3
; CHECK-P9-NEXT: blr
entry:
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 0, i32 1, i32 18, i32 19, i32 2, i32 3, i32 20, i32 21, i32 4, i32 5, i32 22, i32 23, i32 6, i32 7>
@ -159,18 +135,12 @@ entry:
define dso_local <16 x i8> @testmrghw2(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
; CHECK-P8-LABEL: testmrghw2:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r3, r2, .LCPI9_0@toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI9_0@toc@l
; CHECK-P8-NEXT: lvx v4, 0, r3
; CHECK-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-P8-NEXT: vmrghw v2, v2, v3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: testmrghw2:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: addis r3, r2, .LCPI9_0@toc@ha
; CHECK-P9-NEXT: addi r3, r3, .LCPI9_0@toc@l
; CHECK-P9-NEXT: lxvx v4, 0, r3
; CHECK-P9-NEXT: vperm v2, v3, v2, v4
; CHECK-P9-NEXT: vmrghw v2, v2, v3
; CHECK-P9-NEXT: blr
entry:
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 24, i32 25, i32 26, i32 27, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31, i32 12, i32 13, i32 14, i32 15>
@ -193,18 +163,12 @@ entry:
define dso_local <16 x i8> @testmrglw2(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
; CHECK-P8-LABEL: testmrglw2:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r3, r2, .LCPI11_0@toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI11_0@toc@l
; CHECK-P8-NEXT: lvx v4, 0, r3
; CHECK-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-P8-NEXT: vmrglw v2, v2, v3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: testmrglw2:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: addis r3, r2, .LCPI11_0@toc@ha
; CHECK-P9-NEXT: addi r3, r3, .LCPI11_0@toc@l
; CHECK-P9-NEXT: lxvx v4, 0, r3
; CHECK-P9-NEXT: vperm v2, v3, v2, v4
; CHECK-P9-NEXT: vmrglw v2, v2, v3
; CHECK-P9-NEXT: blr
entry:
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 4, i32 5, i32 6, i32 7>
@ -215,24 +179,16 @@ define dso_local <8 x i16> @testmrglb3(<8 x i8>* nocapture readonly %a) local_un
; CHECK-P8-LABEL: testmrglb3:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: ld r3, 0(r3)
; CHECK-P8-NEXT: addis r4, r2, .LCPI12_0@toc@ha
; CHECK-P8-NEXT: xxlxor v4, v4, v4
; CHECK-P8-NEXT: mtfprd f0, r3
; CHECK-P8-NEXT: addi r3, r4, .LCPI12_0@toc@l
; CHECK-P8-NEXT: lvx v3, 0, r3
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: vperm v2, v2, v4, v3
; CHECK-P8-NEXT: xxlxor v2, v2, v2
; CHECK-P8-NEXT: mtvsrd v3, r3
; CHECK-P8-NEXT: vmrghb v2, v2, v3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: testmrglb3:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lfd f0, 0(r3)
; CHECK-P9-NEXT: addis r3, r2, .LCPI12_0@toc@ha
; CHECK-P9-NEXT: addi r3, r3, .LCPI12_0@toc@l
; CHECK-P9-NEXT: lxvx v3, 0, r3
; CHECK-P9-NEXT: xxswapd v2, f0
; CHECK-P9-NEXT: xxlxor v4, v4, v4
; CHECK-P9-NEXT: vperm v2, v2, v4, v3
; CHECK-P9-NEXT: lxsd v2, 0(r3)
; CHECK-P9-NEXT: xxlxor v3, v3, v3
; CHECK-P9-NEXT: vmrghb v2, v3, v2
; CHECK-P9-NEXT: blr
entry:
%0 = load <8 x i8>, <8 x i8>* %a, align 8

View File

@ -331,12 +331,12 @@ define <2 x float> @fptrunc_v2f32_v2f64(<2 x double> %vf1) {
; P9: # %bb.0:
; P9-NEXT: xsrsp f0, v2
; P9-NEXT: xscvdpspn vs0, f0
; P9-NEXT: xxsldwi v3, vs0, vs0, 1
; P9-NEXT: xxsldwi v3, vs0, vs0, 3
; P9-NEXT: xxswapd vs0, v2
; P9-NEXT: xsrsp f0, f0
; P9-NEXT: xscvdpspn vs0, f0
; P9-NEXT: xxsldwi v2, vs0, vs0, 1
; P9-NEXT: vmrglw v2, v3, v2
; P9-NEXT: xxsldwi v2, vs0, vs0, 3
; P9-NEXT: vmrghw v2, v3, v2
; P9-NEXT: blr
%res = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(
<2 x double> %vf1,

View File

@ -40,8 +40,7 @@ define dso_local void @test2(<4 x float>* nocapture %c, float* nocapture readonl
; P8: # %bb.0: # %entry
; P8-NEXT: addi r4, r4, 12
; P8-NEXT: lfiwzx f0, 0, r4
; P8-NEXT: xxswapd vs0, f0
; P8-NEXT: xxspltw v2, vs0, 3
; P8-NEXT: xxspltw v2, vs0, 1
; P8-NEXT: stvx v2, 0, r3
; P8-NEXT: blr
entry:
@ -65,8 +64,7 @@ define dso_local void @test3(<4 x i32>* nocapture %c, i32* nocapture readonly %a
; P8: # %bb.0: # %entry
; P8-NEXT: addi r4, r4, 12
; P8-NEXT: lfiwzx f0, 0, r4
; P8-NEXT: xxswapd vs0, f0
; P8-NEXT: xxspltw v2, vs0, 3
; P8-NEXT: xxspltw v2, vs0, 1
; P8-NEXT: stvx v2, 0, r3
; P8-NEXT: blr
entry:
@ -110,8 +108,7 @@ define <16 x i8> @unadjusted_lxvwsx(i32* %s, i32* %t) {
; P8-LABEL: unadjusted_lxvwsx:
; P8: # %bb.0: # %entry
; P8-NEXT: lfiwzx f0, 0, r3
; P8-NEXT: xxswapd vs0, f0
; P8-NEXT: xxspltw v2, vs0, 3
; P8-NEXT: xxspltw v2, vs0, 1
; P8-NEXT: blr
entry:
%0 = bitcast i32* %s to <4 x i8>*
@ -131,8 +128,7 @@ define <16 x i8> @adjusted_lxvwsx(i64* %s, i64* %t) {
; P8: # %bb.0: # %entry
; P8-NEXT: ld r3, 0(r3)
; P8-NEXT: mtfprd f0, r3
; P8-NEXT: xxswapd v2, vs0
; P8-NEXT: xxspltw v2, v2, 2
; P8-NEXT: xxspltw v2, vs0, 0
; P8-NEXT: blr
entry:
%0 = bitcast i64* %s to <8 x i8>*

View File

@ -9,8 +9,7 @@ define <16 x i8> @test(i32* %s, i32* %t) {
; CHECK-LE-LABEL: test:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: lfiwzx f0, 0, r3
; CHECK-LE-NEXT: xxswapd vs0, f0
; CHECK-LE-NEXT: xxspltw v2, vs0, 3
; CHECK-LE-NEXT: xxspltw v2, vs0, 1
; CHECK-LE-NEXT: blr
; CHECK-LABEL: test:

View File

@ -21,8 +21,8 @@ entry:
; CHECK: sldi r3, r3, 56
; CHECK: mtvsrd v2, r3
; CHECK-LE-LABEL: buildc
; CHECK-LE: mtfprd f0, r3
; CHECK-LE: xxswapd v2, vs0
; CHECK-LE: mtvsrd v2, r3
; CHECK-LE: vspltb v2, v2, 7
}
; Function Attrs: norecurse nounwind readnone
@ -35,8 +35,8 @@ entry:
; CHECK: sldi r3, r3, 48
; CHECK: mtvsrd v2, r3
; CHECK-LE-LABEL: builds
; CHECK-LE: mtfprd f0, r3
; CHECK-LE: xxswapd v2, vs0
; CHECK-LE: mtvsrd v2, r3
; CHECK-LE: vsplth v2, v2, 3
}
; Function Attrs: norecurse nounwind readnone

View File

@ -17,41 +17,33 @@ define <8 x i16> @pr25080(<8 x i32> %a) {
; LE-NEXT: mfvsrwz 3, 34
; LE-NEXT: xxsldwi 1, 34, 34, 1
; LE-NEXT: mfvsrwz 4, 35
; LE-NEXT: xxsldwi 4, 34, 34, 3
; LE-NEXT: mtfprd 2, 3
; LE-NEXT: xxsldwi 2, 34, 34, 3
; LE-NEXT: mtvsrd 36, 3
; LE-NEXT: mffprwz 3, 0
; LE-NEXT: xxswapd 0, 35
; LE-NEXT: mtfprd 3, 4
; LE-NEXT: xxsldwi 5, 35, 35, 1
; LE-NEXT: mtvsrd 37, 4
; LE-NEXT: mffprwz 4, 1
; LE-NEXT: xxsldwi 7, 35, 35, 3
; LE-NEXT: mtfprd 1, 3
; LE-NEXT: xxswapd 33, 3
; LE-NEXT: mffprwz 3, 4
; LE-NEXT: mtfprd 4, 4
; LE-NEXT: xxswapd 34, 1
; LE-NEXT: xxsldwi 1, 35, 35, 1
; LE-NEXT: mtvsrd 34, 3
; LE-NEXT: mffprwz 3, 2
; LE-NEXT: mtvsrd 32, 4
; LE-NEXT: mffprwz 4, 0
; LE-NEXT: mtfprd 0, 3
; LE-NEXT: xxswapd 35, 4
; LE-NEXT: mffprwz 3, 5
; LE-NEXT: mtfprd 6, 4
; LE-NEXT: xxswapd 36, 0
; LE-NEXT: mtfprd 1, 3
; LE-NEXT: mffprwz 3, 7
; LE-NEXT: xxswapd 37, 6
; LE-NEXT: vmrglh 2, 3, 2
; LE-NEXT: xxswapd 35, 2
; LE-NEXT: mtfprd 2, 3
; LE-NEXT: xxswapd 32, 1
; LE-NEXT: xxsldwi 0, 35, 35, 3
; LE-NEXT: mtvsrd 33, 3
; LE-NEXT: mffprwz 3, 1
; LE-NEXT: mtvsrd 38, 4
; LE-NEXT: mtvsrd 35, 3
; LE-NEXT: mffprwz 3, 0
; LE-NEXT: vmrghh 2, 0, 2
; LE-NEXT: mtvsrd 32, 3
; LE-NEXT: addis 3, 2, .LCPI0_1@toc@ha
; LE-NEXT: vmrghh 4, 1, 4
; LE-NEXT: addi 3, 3, .LCPI0_1@toc@l
; LE-NEXT: xxswapd 38, 2
; LE-NEXT: vmrglh 3, 4, 3
; LE-NEXT: vmrglh 4, 0, 5
; LE-NEXT: vmrglh 5, 6, 1
; LE-NEXT: vmrglw 2, 3, 2
; LE-NEXT: vmrglw 3, 5, 4
; LE-NEXT: vmrghh 3, 3, 6
; LE-NEXT: vmrghh 5, 0, 5
; LE-NEXT: vmrglw 2, 4, 2
; LE-NEXT: vspltish 4, 15
; LE-NEXT: vmrglw 3, 5, 3
; LE-NEXT: xxmrgld 34, 35, 34
; LE-NEXT: lvx 3, 0, 3
; LE-NEXT: xxlor 34, 34, 35

View File

@ -58,12 +58,11 @@ L.LB38_2452:
; CHECK-LABEL: @aercalc_
; CHECK: lfs
; CHECK: xxspltd
; CHECK: xxswapd
; CHECK: stxvd2x
; CHECK-NOT: xxswapd
; CHECK-P9-LABEL: @aercalc_
; CHECK-P9: lfs
; CHECK-P9: xxspltd
; CHECK-P9: stxv
; CHECK-P9-NOT: xxswapd

View File

@ -11,9 +11,8 @@ declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #0
define void @draw_llvm_vs_variant0(<4 x float> %x) {
; CHECK-LABEL: draw_llvm_vs_variant0:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lfd f0, 0(r3)
; CHECK-NEXT: xxswapd v3, f0
; CHECK-NEXT: vmrglh v3, v3, v3
; CHECK-NEXT: lxsd v3, 0(r3)
; CHECK-NEXT: vmrghh v3, v3, v3
; CHECK-NEXT: vextsh2w v3, v3
; CHECK-NEXT: xvcvsxwsp vs0, v3
; CHECK-NEXT: xxspltw vs0, vs0, 2

View File

@ -11,34 +11,31 @@
define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 signext %i_stride_pix1, i8* nocapture readonly %pix2) {
; CHECK-LABEL: test_pre_inc_disable_1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lfd f0, 0(r5)
; CHECK-NEXT: lxsd v5, 0(r5)
; CHECK-NEXT: addis r5, r2, .LCPI0_0@toc@ha
; CHECK-NEXT: addi r5, r5, .LCPI0_0@toc@l
; CHECK-NEXT: lxvx v2, 0, r5
; CHECK-NEXT: addis r5, r2, .LCPI0_1@toc@ha
; CHECK-NEXT: addi r5, r5, .LCPI0_1@toc@l
; CHECK-NEXT: lxvx v4, 0, r5
; CHECK-NEXT: xxswapd v5, f0
; CHECK-NEXT: xxlxor v3, v3, v3
; CHECK-NEXT: li r5, 4
; CHECK-NEXT: xxlxor v3, v3, v3
; CHECK-NEXT: vperm v0, v3, v5, v2
; CHECK-NEXT: mtctr r5
; CHECK-NEXT: li r5, 0
; CHECK-NEXT: vperm v1, v5, v3, v4
; CHECK-NEXT: vperm v1, v3, v5, v4
; CHECK-NEXT: li r6, 0
; CHECK-NEXT: xvnegsp v5, v0
; CHECK-NEXT: xvnegsp v0, v1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1: # %for.cond1.preheader
; CHECK-NEXT: #
; CHECK-NEXT: lfd f0, 0(r3)
; CHECK-NEXT: xxswapd v1, f0
; CHECK-NEXT: lfdx f0, r3, r4
; CHECK-NEXT: vperm v6, v1, v3, v4
; CHECK-NEXT: lxsd v1, 0(r3)
; CHECK-NEXT: vperm v6, v3, v1, v4
; CHECK-NEXT: vperm v1, v3, v1, v2
; CHECK-NEXT: xvnegsp v1, v1
; CHECK-NEXT: add r7, r3, r4
; CHECK-NEXT: xvnegsp v6, v6
; CHECK-NEXT: add r7, r3, r4
; CHECK-NEXT: vabsduw v1, v1, v5
; CHECK-NEXT: vabsduw v6, v6, v0
; CHECK-NEXT: vadduwm v1, v6, v1
@ -46,15 +43,14 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig
; CHECK-NEXT: vadduwm v1, v1, v6
; CHECK-NEXT: xxspltw v6, v1, 2
; CHECK-NEXT: vadduwm v1, v1, v6
; CHECK-NEXT: xxswapd v6, f0
; CHECK-NEXT: lxsdx v6, r3, r4
; CHECK-NEXT: vextuwrx r3, r5, v1
; CHECK-NEXT: vperm v7, v6, v3, v4
; CHECK-NEXT: vperm v7, v3, v6, v4
; CHECK-NEXT: vperm v6, v3, v6, v2
; CHECK-NEXT: add r6, r3, r6
; CHECK-NEXT: add r3, r7, r4
; CHECK-NEXT: xvnegsp v6, v6
; CHECK-NEXT: xvnegsp v1, v7
; CHECK-NEXT: vabsduw v6, v6, v5
; CHECK-NEXT: add r6, r3, r6
; CHECK-NEXT: vabsduw v1, v1, v0
; CHECK-NEXT: vadduwm v1, v1, v6
; CHECK-NEXT: xxswapd v6, v1
@ -62,6 +58,7 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig
; CHECK-NEXT: xxspltw v6, v1, 2
; CHECK-NEXT: vadduwm v1, v1, v6
; CHECK-NEXT: vextuwrx r8, r5, v1
; CHECK-NEXT: add r3, r7, r4
; CHECK-NEXT: add r6, r8, r6
; CHECK-NEXT: bdnz .LBB0_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
@ -181,29 +178,27 @@ for.cond.cleanup: ; preds = %for.cond1.preheader
define signext i32 @test_pre_inc_disable_2(i8* nocapture readonly %pix1, i8* nocapture readonly %pix2) {
; CHECK-LABEL: test_pre_inc_disable_2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lfd f0, 0(r3)
; CHECK-NEXT: lxsd v2, 0(r3)
; CHECK-NEXT: addis r3, r2, .LCPI1_0@toc@ha
; CHECK-NEXT: addi r3, r3, .LCPI1_0@toc@l
; CHECK-NEXT: lxvx v4, 0, r3
; CHECK-NEXT: addis r3, r2, .LCPI1_1@toc@ha
; CHECK-NEXT: xxswapd v2, f0
; CHECK-NEXT: lfd f0, 0(r4)
; CHECK-NEXT: addi r3, r3, .LCPI1_1@toc@l
; CHECK-NEXT: xxlxor v3, v3, v3
; CHECK-NEXT: lxvx v0, 0, r3
; CHECK-NEXT: xxswapd v1, f0
; CHECK-NEXT: vperm v5, v2, v3, v4
; CHECK-NEXT: lxsd v1, 0(r4)
; CHECK-NEXT: xxlxor v3, v3, v3
; CHECK-NEXT: vperm v5, v3, v2, v4
; CHECK-NEXT: vperm v2, v3, v2, v0
; CHECK-NEXT: vperm v0, v3, v1, v0
; CHECK-NEXT: vperm v3, v1, v3, v4
; CHECK-NEXT: vperm v3, v3, v1, v4
; CHECK-NEXT: vabsduw v2, v2, v0
; CHECK-NEXT: vabsduw v3, v5, v3
; CHECK-NEXT: vadduwm v2, v3, v2
; CHECK-NEXT: xxswapd v3, v2
; CHECK-NEXT: li r3, 0
; CHECK-NEXT: vadduwm v2, v2, v3
; CHECK-NEXT: xxspltw v3, v2, 2
; CHECK-NEXT: vadduwm v2, v2, v3
; CHECK-NEXT: li r3, 0
; CHECK-NEXT: vextuwrx r3, r3, v2
; CHECK-NEXT: extsw r3, r3
; CHECK-NEXT: blr
@ -286,16 +281,14 @@ define void @test32(i8* nocapture readonly %pix2, i32 signext %i_pix2) {
; CHECK-LABEL: test32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: add r5, r3, r4
; CHECK-NEXT: lfiwzx f0, r3, r4
; CHECK-NEXT: lxsiwzx v2, r3, r4
; CHECK-NEXT: addis r3, r2, .LCPI2_0@toc@ha
; CHECK-NEXT: addi r3, r3, .LCPI2_0@toc@l
; CHECK-NEXT: lxvx v4, 0, r3
; CHECK-NEXT: li r3, 4
; CHECK-NEXT: xxswapd v2, f0
; CHECK-NEXT: lfiwzx f0, r5, r3
; CHECK-NEXT: lxsiwzx v5, r5, r3
; CHECK-NEXT: xxlxor v3, v3, v3
; CHECK-NEXT: vperm v2, v2, v3, v4
; CHECK-NEXT: xxswapd v5, f0
; CHECK-NEXT: vperm v3, v5, v3, v4
; CHECK-NEXT: vspltisw v4, 8
; CHECK-NEXT: vnegw v3, v3
@ -361,16 +354,15 @@ define void @test16(i16* nocapture readonly %sums, i32 signext %delta, i32 signe
; CHECK-NEXT: lxsihzx v2, r6, r7
; CHECK-NEXT: lxsihzx v4, r3, r4
; CHECK-NEXT: li r6, 0
; CHECK-NEXT: mtfprd f0, r6
; CHECK-NEXT: mtvsrd v3, r6
; CHECK-NEXT: vsplth v4, v4, 3
; CHECK-NEXT: xxswapd v3, vs0
; CHECK-NEXT: vsplth v2, v2, 3
; CHECK-NEXT: addis r3, r2, .LCPI3_0@toc@ha
; CHECK-NEXT: addi r3, r3, .LCPI3_0@toc@l
; CHECK-NEXT: vmrglh v2, v3, v2
; CHECK-NEXT: vmrglh v3, v3, v4
; CHECK-NEXT: xxlxor v4, v4, v4
; CHECK-NEXT: vmrglw v3, v3, v4
; CHECK-NEXT: vmrghh v4, v3, v4
; CHECK-NEXT: vmrghh v2, v3, v2
; CHECK-NEXT: vsplth v3, v3, 3
; CHECK-NEXT: vmrglw v3, v4, v3
; CHECK-NEXT: lxvx v4, 0, r3
; CHECK-NEXT: li r3, 0
; CHECK-NEXT: vperm v2, v2, v3, v4
@ -446,18 +438,17 @@ define void @test8(i8* nocapture readonly %sums, i32 signext %delta, i32 signext
; CHECK-NEXT: add r6, r3, r4
; CHECK-NEXT: lxsibzx v2, r3, r4
; CHECK-NEXT: li r3, 0
; CHECK-NEXT: mtfprd f0, r3
; CHECK-NEXT: mtvsrd v3, r3
; CHECK-NEXT: li r3, 8
; CHECK-NEXT: lxsibzx v5, r6, r3
; CHECK-NEXT: xxswapd v3, vs0
; CHECK-NEXT: vspltb v4, v3, 15
; CHECK-NEXT: vspltb v2, v2, 7
; CHECK-NEXT: vmrglb v2, v3, v2
; CHECK-NEXT: addis r3, r2, .LCPI4_0@toc@ha
; CHECK-NEXT: addi r3, r3, .LCPI4_0@toc@l
; CHECK-NEXT: vspltb v2, v2, 7
; CHECK-NEXT: vmrghb v2, v3, v2
; CHECK-NEXT: vspltb v4, v3, 7
; CHECK-NEXT: vspltb v5, v5, 7
; CHECK-NEXT: vmrglh v2, v2, v4
; CHECK-NEXT: vmrglb v3, v3, v5
; CHECK-NEXT: vmrghb v3, v3, v5
; CHECK-NEXT: vmrglw v2, v2, v4
; CHECK-NEXT: vmrglh v3, v3, v4
; CHECK-NEXT: vmrglw v3, v4, v3

View File

@ -53,8 +53,7 @@ define <4 x float> @foof(float* nocapture readonly %a) #0 {
; CHECK-LABEL: foof:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lfiwzx f0, 0, r3
; CHECK-NEXT: xxswapd vs0, f0
; CHECK-NEXT: xxspltw v2, vs0, 3
; CHECK-NEXT: xxspltw v2, vs0, 1
; CHECK-NEXT: blr
entry:
%0 = load float, float* %a, align 4
@ -68,8 +67,7 @@ define <4 x float> @foofx(float* nocapture readonly %a, i64 %idx) #0 {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r4, r4, 2
; CHECK-NEXT: lfiwzx f0, r3, r4
; CHECK-NEXT: xxswapd vs0, f0
; CHECK-NEXT: xxspltw v2, vs0, 3
; CHECK-NEXT: xxspltw v2, vs0, 1
; CHECK-NEXT: blr
entry:
%p = getelementptr float, float* %a, i64 %idx

View File

@ -13,8 +13,7 @@ define <2 x i64> @s2v_test1(i64* nocapture readonly %int64, <2 x i64> %vec) {
; P9LE-LABEL: s2v_test1:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 0(r3)
; P9LE-NEXT: xxswapd v3, f0
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: xxmrghd v2, v2, vs0
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test1:
@ -33,8 +32,7 @@ define <2 x i64> @s2v_test2(i64* nocapture readonly %int64, <2 x i64> %vec) {
; P9LE-LABEL: s2v_test2:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 8(r3)
; P9LE-NEXT: xxswapd v3, f0
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: xxmrghd v2, v2, vs0
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test2:
@ -55,8 +53,7 @@ define <2 x i64> @s2v_test3(i64* nocapture readonly %int64, <2 x i64> %vec, i32
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: sldi r4, r7, 3
; P9LE-NEXT: lfdx f0, r3, r4
; P9LE-NEXT: xxswapd v3, f0
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: xxmrghd v2, v2, vs0
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test3
@ -78,8 +75,7 @@ define <2 x i64> @s2v_test4(i64* nocapture readonly %int64, <2 x i64> %vec) {
; P9LE-LABEL: s2v_test4:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 8(r3)
; P9LE-NEXT: xxswapd v3, f0
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: xxmrghd v2, v2, vs0
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test4:
@ -99,8 +95,7 @@ define <2 x i64> @s2v_test5(<2 x i64> %vec, i64* nocapture readonly %ptr1) {
; P9LE-LABEL: s2v_test5:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 0(r5)
; P9LE-NEXT: xxswapd v3, f0
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: xxmrghd v2, v2, vs0
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test5:
@ -119,8 +114,7 @@ define <2 x double> @s2v_test_f1(double* nocapture readonly %f64, <2 x double> %
; P9LE-LABEL: s2v_test_f1:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 0(r3)
; P9LE-NEXT: xxswapd vs0, f0
; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
; P9LE-NEXT: xxmrghd v2, v2, vs0
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test_f1:
@ -132,8 +126,7 @@ define <2 x double> @s2v_test_f1(double* nocapture readonly %f64, <2 x double> %
; P8LE-LABEL: s2v_test_f1:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfdx f0, 0, r3
; P8LE-NEXT: xxspltd vs0, vs0, 0
; P8LE-NEXT: xxpermdi v2, v2, vs0, 1
; P8LE-NEXT: xxmrghd v2, v2, vs0
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test_f1:
@ -152,8 +145,7 @@ define <2 x double> @s2v_test_f2(double* nocapture readonly %f64, <2 x double> %
; P9LE-LABEL: s2v_test_f2:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 8(r3)
; P9LE-NEXT: xxswapd vs0, f0
; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
; P9LE-NEXT: xxmrghd v2, v2, vs0
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test_f2:
@ -165,8 +157,7 @@ define <2 x double> @s2v_test_f2(double* nocapture readonly %f64, <2 x double> %
; P8LE-LABEL: s2v_test_f2:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfd f0, 8(r3)
; P8LE-NEXT: xxspltd vs0, vs0, 0
; P8LE-NEXT: xxpermdi v2, v2, vs0, 1
; P8LE-NEXT: xxmrghd v2, v2, vs0
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test_f2:
@ -187,8 +178,7 @@ define <2 x double> @s2v_test_f3(double* nocapture readonly %f64, <2 x double> %
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: sldi r4, r7, 3
; P9LE-NEXT: lfdx f0, r3, r4
; P9LE-NEXT: xxswapd vs0, f0
; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
; P9LE-NEXT: xxmrghd v2, v2, vs0
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test_f3:
@ -202,8 +192,7 @@ define <2 x double> @s2v_test_f3(double* nocapture readonly %f64, <2 x double> %
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: sldi r4, r7, 3
; P8LE-NEXT: lfdx f0, r3, r4
; P8LE-NEXT: xxspltd vs0, vs0, 0
; P8LE-NEXT: xxpermdi v2, v2, vs0, 1
; P8LE-NEXT: xxmrghd v2, v2, vs0
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test_f3:
@ -225,8 +214,7 @@ define <2 x double> @s2v_test_f4(double* nocapture readonly %f64, <2 x double> %
; P9LE-LABEL: s2v_test_f4:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 8(r3)
; P9LE-NEXT: xxswapd vs0, f0
; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
; P9LE-NEXT: xxmrghd v2, v2, vs0
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test_f4:
@ -238,8 +226,7 @@ define <2 x double> @s2v_test_f4(double* nocapture readonly %f64, <2 x double> %
; P8LE-LABEL: s2v_test_f4:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfd f0, 8(r3)
; P8LE-NEXT: xxspltd vs0, vs0, 0
; P8LE-NEXT: xxpermdi v2, v2, vs0, 1
; P8LE-NEXT: xxmrghd v2, v2, vs0
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test_f4:
@ -259,8 +246,7 @@ define <2 x double> @s2v_test_f5(<2 x double> %vec, double* nocapture readonly %
; P9LE-LABEL: s2v_test_f5:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 0(r5)
; P9LE-NEXT: xxswapd vs0, f0
; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
; P9LE-NEXT: xxmrghd v2, v2, vs0
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test_f5:
@ -272,8 +258,7 @@ define <2 x double> @s2v_test_f5(<2 x double> %vec, double* nocapture readonly %
; P8LE-LABEL: s2v_test_f5:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfdx f0, 0, r5
; P8LE-NEXT: xxspltd vs0, vs0, 0
; P8LE-NEXT: xxpermdi v2, v2, vs0, 1
; P8LE-NEXT: xxmrghd v2, v2, vs0
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test_f5:

View File

@ -12,8 +12,7 @@ define <2 x i64> @s2v_test1(i32* nocapture readonly %int32, <2 x i64> %vec) {
; P9LE-LABEL: s2v_test1:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfiwax f0, 0, r3
; P9LE-NEXT: xxswapd v3, f0
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: xxmrghd v2, v2, vs0
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test1:
@ -25,8 +24,7 @@ define <2 x i64> @s2v_test1(i32* nocapture readonly %int32, <2 x i64> %vec) {
; P8LE-LABEL: s2v_test1:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwax f0, 0, r3
; P8LE-NEXT: xxswapd v3, f0
; P8LE-NEXT: xxpermdi v2, v2, v3, 1
; P8LE-NEXT: xxmrghd v2, v2, vs0
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test1:
@ -47,8 +45,7 @@ define <2 x i64> @s2v_test2(i32* nocapture readonly %int32, <2 x i64> %vec) {
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: addi r3, r3, 4
; P9LE-NEXT: lfiwax f0, 0, r3
; P9LE-NEXT: xxswapd v3, f0
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: xxmrghd v2, v2, vs0
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test2:
@ -62,8 +59,7 @@ define <2 x i64> @s2v_test2(i32* nocapture readonly %int32, <2 x i64> %vec) {
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addi r3, r3, 4
; P8LE-NEXT: lfiwax f0, 0, r3
; P8LE-NEXT: xxswapd v3, f0
; P8LE-NEXT: xxpermdi v2, v2, v3, 1
; P8LE-NEXT: xxmrghd v2, v2, vs0
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test2:
@ -86,8 +82,7 @@ define <2 x i64> @s2v_test3(i32* nocapture readonly %int32, <2 x i64> %vec, i32
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: sldi r4, r7, 2
; P9LE-NEXT: lfiwax f0, r3, r4
; P9LE-NEXT: xxswapd v3, f0
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: xxmrghd v2, v2, vs0
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test3:
@ -101,8 +96,7 @@ define <2 x i64> @s2v_test3(i32* nocapture readonly %int32, <2 x i64> %vec, i32
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: sldi r4, r7, 2
; P8LE-NEXT: lfiwax f0, r3, r4
; P8LE-NEXT: xxswapd v3, f0
; P8LE-NEXT: xxpermdi v2, v2, v3, 1
; P8LE-NEXT: xxmrghd v2, v2, vs0
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test3:
@ -126,8 +120,7 @@ define <2 x i64> @s2v_test4(i32* nocapture readonly %int32, <2 x i64> %vec) {
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: addi r3, r3, 4
; P9LE-NEXT: lfiwax f0, 0, r3
; P9LE-NEXT: xxswapd v3, f0
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: xxmrghd v2, v2, vs0
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test4:
@ -141,8 +134,7 @@ define <2 x i64> @s2v_test4(i32* nocapture readonly %int32, <2 x i64> %vec) {
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addi r3, r3, 4
; P8LE-NEXT: lfiwax f0, 0, r3
; P8LE-NEXT: xxswapd v3, f0
; P8LE-NEXT: xxpermdi v2, v2, v3, 1
; P8LE-NEXT: xxmrghd v2, v2, vs0
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test4:
@ -164,8 +156,7 @@ define <2 x i64> @s2v_test5(<2 x i64> %vec, i32* nocapture readonly %ptr1) {
; P9LE-LABEL: s2v_test5:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfiwax f0, 0, r5
; P9LE-NEXT: xxswapd v3, f0
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: xxmrghd v2, v2, vs0
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test5:
@ -177,8 +168,7 @@ define <2 x i64> @s2v_test5(<2 x i64> %vec, i32* nocapture readonly %ptr1) {
; P8LE-LABEL: s2v_test5:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwax f0, 0, r5
; P8LE-NEXT: xxswapd v3, f0
; P8LE-NEXT: xxpermdi v2, v2, v3, 1
; P8LE-NEXT: xxmrghd v2, v2, vs0
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test5:
@ -198,8 +188,7 @@ define <2 x i64> @s2v_test6(i32* nocapture readonly %ptr) {
; P9LE-LABEL: s2v_test6:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfiwax f0, 0, r3
; P9LE-NEXT: xxswapd v2, f0
; P9LE-NEXT: xxspltd v2, v2, 1
; P9LE-NEXT: xxspltd v2, vs0, 0
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test6:
@ -211,8 +200,7 @@ define <2 x i64> @s2v_test6(i32* nocapture readonly %ptr) {
; P8LE-LABEL: s2v_test6:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwax f0, 0, r3
; P8LE-NEXT: xxswapd v2, f0
; P8LE-NEXT: xxspltd v2, v2, 1
; P8LE-NEXT: xxspltd v2, vs0, 0
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test6:
@ -233,8 +221,7 @@ define <2 x i64> @s2v_test7(i32* nocapture readonly %ptr) {
; P9LE-LABEL: s2v_test7:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfiwax f0, 0, r3
; P9LE-NEXT: xxswapd v2, f0
; P9LE-NEXT: xxspltd v2, v2, 1
; P9LE-NEXT: xxspltd v2, vs0, 0
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test7:
@ -246,8 +233,7 @@ define <2 x i64> @s2v_test7(i32* nocapture readonly %ptr) {
; P8LE-LABEL: s2v_test7:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwax f0, 0, r3
; P8LE-NEXT: xxswapd v2, f0
; P8LE-NEXT: xxspltd v2, v2, 1
; P8LE-NEXT: xxspltd v2, vs0, 0
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test7:

View File

@ -11,12 +11,11 @@
define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec) {
; P8LE-LABEL: s2v_test1:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: addis r4, r2, .LCPI0_0@toc@ha
; P8LE-NEXT: addi r3, r4, .LCPI0_0@toc@l
; P8LE-NEXT: lvx v3, 0, r3
; P8LE-NEXT: xxswapd v4, f0
; P8LE-NEXT: vperm v2, v4, v2, v3
; P8LE-NEXT: lxsiwzx v4, 0, r3
; P8LE-NEXT: addi r4, r4, .LCPI0_0@toc@l
; P8LE-NEXT: lvx v3, 0, r4
; P8LE-NEXT: vperm v2, v2, v4, v3
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test1:
@ -36,13 +35,12 @@ entry:
define <4 x i32> @s2v_test2(i32* nocapture readonly %int32, <4 x i32> %vec) {
; P8LE-LABEL: s2v_test2:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addi r3, r3, 4
; P8LE-NEXT: addis r4, r2, .LCPI1_0@toc@ha
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: addi r3, r4, .LCPI1_0@toc@l
; P8LE-NEXT: lvx v3, 0, r3
; P8LE-NEXT: xxswapd v4, f0
; P8LE-NEXT: vperm v2, v4, v2, v3
; P8LE-NEXT: addi r3, r3, 4
; P8LE-NEXT: addi r4, r4, .LCPI1_0@toc@l
; P8LE-NEXT: lxsiwzx v4, 0, r3
; P8LE-NEXT: lvx v3, 0, r4
; P8LE-NEXT: vperm v2, v2, v4, v3
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test2:
@ -64,13 +62,12 @@ entry:
define <4 x i32> @s2v_test3(i32* nocapture readonly %int32, <4 x i32> %vec, i32 signext %Idx) {
; P8LE-LABEL: s2v_test3:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: sldi r5, r7, 2
; P8LE-NEXT: addis r4, r2, .LCPI2_0@toc@ha
; P8LE-NEXT: lfiwzx f0, r3, r5
; P8LE-NEXT: addi r3, r4, .LCPI2_0@toc@l
; P8LE-NEXT: lvx v4, 0, r3
; P8LE-NEXT: xxswapd v3, f0
; P8LE-NEXT: vperm v2, v3, v2, v4
; P8LE-NEXT: sldi r5, r7, 2
; P8LE-NEXT: addi r4, r4, .LCPI2_0@toc@l
; P8LE-NEXT: lxsiwzx v3, r3, r5
; P8LE-NEXT: lvx v4, 0, r4
; P8LE-NEXT: vperm v2, v2, v3, v4
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test3:
@ -93,13 +90,12 @@ entry:
define <4 x i32> @s2v_test4(i32* nocapture readonly %int32, <4 x i32> %vec) {
; P8LE-LABEL: s2v_test4:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addi r3, r3, 4
; P8LE-NEXT: addis r4, r2, .LCPI3_0@toc@ha
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: addi r3, r4, .LCPI3_0@toc@l
; P8LE-NEXT: lvx v3, 0, r3
; P8LE-NEXT: xxswapd v4, f0
; P8LE-NEXT: vperm v2, v4, v2, v3
; P8LE-NEXT: addi r3, r3, 4
; P8LE-NEXT: addi r4, r4, .LCPI3_0@toc@l
; P8LE-NEXT: lxsiwzx v4, 0, r3
; P8LE-NEXT: lvx v3, 0, r4
; P8LE-NEXT: vperm v2, v2, v4, v3
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test4:
@ -121,12 +117,11 @@ entry:
define <4 x i32> @s2v_test5(<4 x i32> %vec, i32* nocapture readonly %ptr1) {
; P8LE-LABEL: s2v_test5:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwzx f0, 0, r5
; P8LE-NEXT: addis r3, r2, .LCPI4_0@toc@ha
; P8LE-NEXT: lxsiwzx v4, 0, r5
; P8LE-NEXT: addi r3, r3, .LCPI4_0@toc@l
; P8LE-NEXT: lvx v3, 0, r3
; P8LE-NEXT: xxswapd v4, f0
; P8LE-NEXT: vperm v2, v4, v2, v3
; P8LE-NEXT: vperm v2, v2, v4, v3
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test5:
@ -146,12 +141,11 @@ entry:
define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec) {
; P8LE-LABEL: s2v_test_f1:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: addis r4, r2, .LCPI5_0@toc@ha
; P8LE-NEXT: addi r3, r4, .LCPI5_0@toc@l
; P8LE-NEXT: lvx v3, 0, r3
; P8LE-NEXT: xxswapd v4, f0
; P8LE-NEXT: vperm v2, v4, v2, v3
; P8LE-NEXT: lxsiwzx v4, 0, r3
; P8LE-NEXT: addi r4, r4, .LCPI5_0@toc@l
; P8LE-NEXT: lvx v3, 0, r4
; P8LE-NEXT: vperm v2, v2, v4, v3
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test_f1:
@ -172,10 +166,9 @@ define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec
; P9LE-LABEL: s2v_test_f2:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: addi r3, r3, 4
; P9LE-DAG: xxspltw v2, v2, 2
; P9LE-DAG: lfiwzx f0, 0, r3
; P9LE-NEXT: xxswapd v3, f0
; P9LE-NEXT: vmrglw v2, v2, v3
; P9LE-NEXT: lxsiwzx v3, 0, r3
; P9LE-NEXT: vmrglw v2, v2, v2
; P9LE-NEXT: vmrghw v2, v2, v3
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test_f2:
@ -189,11 +182,10 @@ define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec
; P8LE-LABEL: s2v_test_f2:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: vmrglw v2, v2, v2
; P8LE-NEXT: addi r3, r3, 4
; P8LE-NEXT: xxspltw v2, v2, 2
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: xxswapd v3, f0
; P8LE-NEXT: vmrglw v2, v2, v3
; P8LE-NEXT: lxsiwzx v3, 0, r3
; P8LE-NEXT: vmrghw v2, v2, v3
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test_f2:
@ -216,10 +208,9 @@ define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec
; P9LE-LABEL: s2v_test_f3:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: sldi r4, r7, 2
; P9LE-NEXT: lfiwzx f0, r3, r4
; P9LE-DAG: xxspltw v2, v2, 2
; P9LE-DAG: xxswapd v3, f0
; P9LE-NEXT: vmrglw v2, v2, v3
; P9LE-NEXT: lxsiwzx v3, r3, r4
; P9LE-NEXT: vmrglw v2, v2, v2
; P9LE-NEXT: vmrghw v2, v2, v3
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test_f3:
@ -233,11 +224,10 @@ define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec
; P8LE-LABEL: s2v_test_f3:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: vmrglw v2, v2, v2
; P8LE-NEXT: sldi r4, r7, 2
; P8LE-NEXT: xxspltw v2, v2, 2
; P8LE-NEXT: lfiwzx f0, r3, r4
; P8LE-NEXT: xxswapd v3, f0
; P8LE-NEXT: vmrglw v2, v2, v3
; P8LE-NEXT: lxsiwzx v3, r3, r4
; P8LE-NEXT: vmrghw v2, v2, v3
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test_f3:
@ -261,10 +251,9 @@ define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec
; P9LE-LABEL: s2v_test_f4:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: addi r3, r3, 4
; P9LE-NEXT: lfiwzx f0, 0, r3
; P9LE-DAG: xxspltw v2, v2, 2
; P9LE-DAG: xxswapd v3, f0
; P9LE-NEXT: vmrglw v2, v2, v3
; P9LE-NEXT: lxsiwzx v3, 0, r3
; P9LE-NEXT: vmrglw v2, v2, v2
; P9LE-NEXT: vmrghw v2, v2, v3
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test_f4:
@ -278,11 +267,10 @@ define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec
; P8LE-LABEL: s2v_test_f4:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: vmrglw v2, v2, v2
; P8LE-NEXT: addi r3, r3, 4
; P8LE-NEXT: xxspltw v2, v2, 2
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: xxswapd v3, f0
; P8LE-NEXT: vmrglw v2, v2, v3
; P8LE-NEXT: lxsiwzx v3, 0, r3
; P8LE-NEXT: vmrghw v2, v2, v3
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test_f4:
@ -304,10 +292,9 @@ entry:
define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr1) {
; P9LE-LABEL: s2v_test_f5:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfiwzx f0, 0, r5
; P9LE-NEXT: xxspltw v2, v2, 2
; P9LE-NEXT: xxswapd v3, f0
; P9LE-NEXT: vmrglw v2, v2, v3
; P9LE-NEXT: lxsiwzx v3, 0, r5
; P9LE-NEXT: vmrglw v2, v2, v2
; P9LE-NEXT: vmrghw v2, v2, v3
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test_f5:
@ -320,10 +307,9 @@ define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr
; P8LE-LABEL: s2v_test_f5:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwzx f0, 0, r5
; P8LE-NEXT: xxspltw v2, v2, 2
; P8LE-NEXT: xxswapd v3, f0
; P8LE-NEXT: vmrglw v2, v2, v3
; P8LE-NEXT: vmrglw v2, v2, v2
; P8LE-NEXT: lxsiwzx v3, 0, r5
; P8LE-NEXT: vmrghw v2, v2, v3
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test_f5:

View File

@ -13,60 +13,56 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
; P9LE: # %bb.0:
; P9LE-NEXT: li r3, 0
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: lis r5, -21386
; P9LE-NEXT: ori r5, r5, 37253
; P9LE-NEXT: extsh r4, r3
; P9LE-NEXT: mulhw r5, r4, r5
; P9LE-NEXT: add r4, r5, r4
; P9LE-NEXT: lis r4, -21386
; P9LE-NEXT: ori r4, r4, 37253
; P9LE-NEXT: extsh r3, r3
; P9LE-NEXT: mulhw r4, r3, r4
; P9LE-NEXT: add r4, r4, r3
; P9LE-NEXT: srwi r5, r4, 31
; P9LE-NEXT: srawi r4, r4, 6
; P9LE-NEXT: add r4, r4, r5
; P9LE-NEXT: lis r5, 31710
; P9LE-NEXT: mulli r4, r4, 95
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: lis r4, 31710
; P9LE-NEXT: mtvsrd v3, r3
; P9LE-NEXT: li r3, 2
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: extsh r4, r3
; P9LE-NEXT: ori r5, r5, 63421
; P9LE-NEXT: mulhw r5, r4, r5
; P9LE-NEXT: sub r4, r5, r4
; P9LE-NEXT: extsh r3, r3
; P9LE-NEXT: ori r4, r4, 63421
; P9LE-NEXT: mulhw r4, r3, r4
; P9LE-NEXT: sub r4, r4, r3
; P9LE-NEXT: srwi r5, r4, 31
; P9LE-NEXT: srawi r4, r4, 6
; P9LE-NEXT: add r4, r4, r5
; P9LE-NEXT: lis r5, 21399
; P9LE-NEXT: mulli r4, r4, -124
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: xxswapd v3, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: lis r4, 21399
; P9LE-NEXT: mtvsrd v4, r3
; P9LE-NEXT: li r3, 4
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: extsh r4, r3
; P9LE-NEXT: ori r5, r5, 33437
; P9LE-NEXT: mulhw r4, r4, r5
; P9LE-NEXT: extsh r3, r3
; P9LE-NEXT: ori r4, r4, 33437
; P9LE-NEXT: mulhw r4, r3, r4
; P9LE-NEXT: srwi r5, r4, 31
; P9LE-NEXT: srawi r4, r4, 5
; P9LE-NEXT: add r4, r4, r5
; P9LE-NEXT: lis r5, -16728
; P9LE-NEXT: mulli r4, r4, 98
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: vmrghh v3, v4, v3
; P9LE-NEXT: mtvsrd v4, r3
; P9LE-NEXT: li r3, 6
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: extsh r4, r3
; P9LE-NEXT: ori r5, r5, 63249
; P9LE-NEXT: mulhw r4, r4, r5
; P9LE-NEXT: lis r4, -16728
; P9LE-NEXT: ori r4, r4, 63249
; P9LE-NEXT: extsh r3, r3
; P9LE-NEXT: mulhw r4, r3, r4
; P9LE-NEXT: srwi r5, r4, 31
; P9LE-NEXT: srawi r4, r4, 8
; P9LE-NEXT: add r4, r4, r5
; P9LE-NEXT: mulli r4, r4, -1003
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: vmrglh v3, v4, v3
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: xxswapd v2, vs0
; P9LE-NEXT: vmrglh v2, v2, v4
; P9LE-NEXT: mtvsrd v2, r3
; P9LE-NEXT: vmrghh v2, v2, v4
; P9LE-NEXT: vmrglw v2, v2, v3
; P9LE-NEXT: blr
;
@ -135,58 +131,54 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
; P8LE: # %bb.0:
; P8LE-NEXT: xxswapd vs0, v2
; P8LE-NEXT: lis r3, 21399
; P8LE-NEXT: lis r9, -21386
; P8LE-NEXT: lis r11, 31710
; P8LE-NEXT: lis r8, -16728
; P8LE-NEXT: lis r9, -21386
; P8LE-NEXT: lis r10, 31710
; P8LE-NEXT: ori r3, r3, 33437
; P8LE-NEXT: ori r9, r9, 37253
; P8LE-NEXT: ori r8, r8, 63249
; P8LE-NEXT: ori r9, r9, 37253
; P8LE-NEXT: ori r10, r10, 63421
; P8LE-NEXT: mffprd r4, f0
; P8LE-NEXT: rldicl r5, r4, 32, 48
; P8LE-NEXT: clrldi r7, r4, 48
; P8LE-NEXT: rldicl r6, r4, 16, 48
; P8LE-NEXT: clrldi r7, r4, 48
; P8LE-NEXT: extsh r5, r5
; P8LE-NEXT: extsh r6, r6
; P8LE-NEXT: rldicl r4, r4, 48, 48
; P8LE-NEXT: extsh r10, r5
; P8LE-NEXT: extsh r0, r7
; P8LE-NEXT: mulhw r3, r10, r3
; P8LE-NEXT: ori r10, r11, 63421
; P8LE-NEXT: extsh r11, r4
; P8LE-NEXT: extsh r12, r6
; P8LE-NEXT: mulhw r9, r0, r9
; P8LE-NEXT: mulhw r10, r11, r10
; P8LE-NEXT: mulhw r8, r12, r8
; P8LE-NEXT: srwi r12, r3, 31
; P8LE-NEXT: extsh r7, r7
; P8LE-NEXT: mulhw r3, r5, r3
; P8LE-NEXT: extsh r4, r4
; P8LE-NEXT: mulhw r8, r6, r8
; P8LE-NEXT: mulhw r9, r7, r9
; P8LE-NEXT: mulhw r10, r4, r10
; P8LE-NEXT: srwi r11, r3, 31
; P8LE-NEXT: srawi r3, r3, 5
; P8LE-NEXT: add r9, r9, r0
; P8LE-NEXT: sub r10, r10, r11
; P8LE-NEXT: add r3, r3, r12
; P8LE-NEXT: add r3, r3, r11
; P8LE-NEXT: srwi r11, r8, 31
; P8LE-NEXT: add r9, r9, r7
; P8LE-NEXT: srawi r8, r8, 8
; P8LE-NEXT: sub r10, r10, r4
; P8LE-NEXT: add r8, r8, r11
; P8LE-NEXT: srwi r11, r9, 31
; P8LE-NEXT: srawi r9, r9, 6
; P8LE-NEXT: srwi r12, r8, 31
; P8LE-NEXT: srawi r8, r8, 8
; P8LE-NEXT: mulli r3, r3, 98
; P8LE-NEXT: add r9, r9, r11
; P8LE-NEXT: srwi r11, r10, 31
; P8LE-NEXT: srawi r10, r10, 6
; P8LE-NEXT: add r8, r8, r12
; P8LE-NEXT: mulli r3, r3, 98
; P8LE-NEXT: add r10, r10, r11
; P8LE-NEXT: mulli r8, r8, -1003
; P8LE-NEXT: add r10, r10, r11
; P8LE-NEXT: mulli r9, r9, 95
; P8LE-NEXT: mulli r10, r10, -124
; P8LE-NEXT: sub r3, r5, r3
; P8LE-NEXT: mtvsrd v2, r3
; P8LE-NEXT: sub r5, r6, r8
; P8LE-NEXT: mtfprd f0, r3
; P8LE-NEXT: sub r3, r7, r9
; P8LE-NEXT: mtvsrd v3, r5
; P8LE-NEXT: sub r4, r4, r10
; P8LE-NEXT: mtfprd f1, r5
; P8LE-NEXT: mtfprd f2, r3
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: mtfprd f3, r4
; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: xxswapd v4, vs2
; P8LE-NEXT: xxswapd v5, vs3
; P8LE-NEXT: vmrglh v2, v3, v2
; P8LE-NEXT: vmrglh v3, v5, v4
; P8LE-NEXT: mtvsrd v4, r3
; P8LE-NEXT: mtvsrd v5, r4
; P8LE-NEXT: vmrghh v2, v3, v2
; P8LE-NEXT: vmrghh v3, v5, v4
; P8LE-NEXT: vmrglw v2, v2, v3
; P8LE-NEXT: blr
;
@ -256,56 +248,52 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
; P9LE: # %bb.0:
; P9LE-NEXT: li r3, 0
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: lis r5, -21386
; P9LE-NEXT: ori r5, r5, 37253
; P9LE-NEXT: extsh r4, r3
; P9LE-NEXT: mulhw r6, r4, r5
; P9LE-NEXT: add r4, r6, r4
; P9LE-NEXT: srwi r6, r4, 31
; P9LE-NEXT: srawi r4, r4, 6
; P9LE-NEXT: add r4, r4, r6
; P9LE-NEXT: mulli r4, r4, 95
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: lis r4, -21386
; P9LE-NEXT: ori r4, r4, 37253
; P9LE-NEXT: extsh r3, r3
; P9LE-NEXT: mulhw r5, r3, r4
; P9LE-NEXT: add r5, r5, r3
; P9LE-NEXT: srwi r6, r5, 31
; P9LE-NEXT: srawi r5, r5, 6
; P9LE-NEXT: add r5, r5, r6
; P9LE-NEXT: mulli r5, r5, 95
; P9LE-NEXT: sub r3, r3, r5
; P9LE-NEXT: mtvsrd v3, r3
; P9LE-NEXT: li r3, 2
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: extsh r4, r3
; P9LE-NEXT: mulhw r6, r4, r5
; P9LE-NEXT: add r4, r6, r4
; P9LE-NEXT: srwi r6, r4, 31
; P9LE-NEXT: srawi r4, r4, 6
; P9LE-NEXT: add r4, r4, r6
; P9LE-NEXT: mulli r4, r4, 95
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: xxswapd v3, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: extsh r3, r3
; P9LE-NEXT: mulhw r5, r3, r4
; P9LE-NEXT: add r5, r5, r3
; P9LE-NEXT: srwi r6, r5, 31
; P9LE-NEXT: srawi r5, r5, 6
; P9LE-NEXT: add r5, r5, r6
; P9LE-NEXT: mulli r5, r5, 95
; P9LE-NEXT: sub r3, r3, r5
; P9LE-NEXT: mtvsrd v4, r3
; P9LE-NEXT: li r3, 4
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: extsh r4, r3
; P9LE-NEXT: mulhw r6, r4, r5
; P9LE-NEXT: add r4, r6, r4
; P9LE-NEXT: srwi r6, r4, 31
; P9LE-NEXT: srawi r4, r4, 6
; P9LE-NEXT: add r4, r4, r6
; P9LE-NEXT: mulli r4, r4, 95
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: extsh r3, r3
; P9LE-NEXT: mulhw r5, r3, r4
; P9LE-NEXT: add r5, r5, r3
; P9LE-NEXT: srwi r6, r5, 31
; P9LE-NEXT: srawi r5, r5, 6
; P9LE-NEXT: add r5, r5, r6
; P9LE-NEXT: mulli r5, r5, 95
; P9LE-NEXT: sub r3, r3, r5
; P9LE-NEXT: vmrghh v3, v4, v3
; P9LE-NEXT: mtvsrd v4, r3
; P9LE-NEXT: li r3, 6
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: extsh r4, r3
; P9LE-NEXT: mulhw r5, r4, r5
; P9LE-NEXT: add r4, r5, r4
; P9LE-NEXT: extsh r3, r3
; P9LE-NEXT: mulhw r4, r3, r4
; P9LE-NEXT: add r4, r4, r3
; P9LE-NEXT: srwi r5, r4, 31
; P9LE-NEXT: srawi r4, r4, 6
; P9LE-NEXT: add r4, r4, r5
; P9LE-NEXT: mulli r4, r4, 95
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: vmrglh v3, v4, v3
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: xxswapd v2, vs0
; P9LE-NEXT: vmrglh v2, v2, v4
; P9LE-NEXT: mtvsrd v2, r3
; P9LE-NEXT: vmrghh v2, v2, v4
; P9LE-NEXT: vmrglw v2, v2, v3
; P9LE-NEXT: blr
;
@ -370,56 +358,50 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
; P8LE: # %bb.0:
; P8LE-NEXT: xxswapd vs0, v2
; P8LE-NEXT: lis r3, -21386
; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; P8LE-NEXT: ori r3, r3, 37253
; P8LE-NEXT: mffprd r4, f0
; P8LE-NEXT: clrldi r5, r4, 48
; P8LE-NEXT: rldicl r6, r4, 48, 48
; P8LE-NEXT: extsh r8, r5
; P8LE-NEXT: extsh r5, r5
; P8LE-NEXT: rldicl r7, r4, 32, 48
; P8LE-NEXT: extsh r9, r6
; P8LE-NEXT: mulhw r10, r8, r3
; P8LE-NEXT: extsh r6, r6
; P8LE-NEXT: mulhw r8, r5, r3
; P8LE-NEXT: rldicl r4, r4, 16, 48
; P8LE-NEXT: extsh r11, r7
; P8LE-NEXT: mulhw r12, r9, r3
; P8LE-NEXT: extsh r0, r4
; P8LE-NEXT: mulhw r30, r11, r3
; P8LE-NEXT: mulhw r3, r0, r3
; P8LE-NEXT: add r8, r10, r8
; P8LE-NEXT: add r9, r12, r9
; P8LE-NEXT: srwi r10, r8, 31
; P8LE-NEXT: extsh r7, r7
; P8LE-NEXT: mulhw r9, r6, r3
; P8LE-NEXT: extsh r4, r4
; P8LE-NEXT: mulhw r10, r7, r3
; P8LE-NEXT: mulhw r3, r4, r3
; P8LE-NEXT: add r8, r8, r5
; P8LE-NEXT: add r9, r9, r6
; P8LE-NEXT: srwi r11, r8, 31
; P8LE-NEXT: srawi r8, r8, 6
; P8LE-NEXT: add r11, r30, r11
; P8LE-NEXT: add r3, r3, r0
; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; P8LE-NEXT: add r8, r8, r10
; P8LE-NEXT: srwi r10, r9, 31
; P8LE-NEXT: add r10, r10, r7
; P8LE-NEXT: add r3, r3, r4
; P8LE-NEXT: add r8, r8, r11
; P8LE-NEXT: srwi r11, r9, 31
; P8LE-NEXT: srawi r9, r9, 6
; P8LE-NEXT: mulli r8, r8, 95
; P8LE-NEXT: add r9, r9, r10
; P8LE-NEXT: srwi r10, r11, 31
; P8LE-NEXT: srawi r11, r11, 6
; P8LE-NEXT: add r9, r9, r11
; P8LE-NEXT: srwi r11, r10, 31
; P8LE-NEXT: srawi r10, r10, 6
; P8LE-NEXT: mulli r9, r9, 95
; P8LE-NEXT: add r10, r11, r10
; P8LE-NEXT: add r10, r10, r11
; P8LE-NEXT: srwi r11, r3, 31
; P8LE-NEXT: srawi r3, r3, 6
; P8LE-NEXT: mulli r10, r10, 95
; P8LE-NEXT: sub r5, r5, r8
; P8LE-NEXT: add r3, r3, r11
; P8LE-NEXT: mtfprd f0, r5
; P8LE-NEXT: mtvsrd v2, r5
; P8LE-NEXT: mulli r3, r3, 95
; P8LE-NEXT: sub r6, r6, r9
; P8LE-NEXT: mtfprd f1, r6
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: mtvsrd v3, r6
; P8LE-NEXT: sub r5, r7, r10
; P8LE-NEXT: mtfprd f2, r5
; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: mtvsrd v4, r5
; P8LE-NEXT: sub r3, r4, r3
; P8LE-NEXT: mtfprd f3, r3
; P8LE-NEXT: xxswapd v4, vs2
; P8LE-NEXT: vmrglh v2, v3, v2
; P8LE-NEXT: xxswapd v5, vs3
; P8LE-NEXT: vmrglh v3, v5, v4
; P8LE-NEXT: vmrghh v2, v3, v2
; P8LE-NEXT: mtvsrd v5, r3
; P8LE-NEXT: vmrghh v3, v5, v4
; P8LE-NEXT: vmrglw v2, v3, v2
; P8LE-NEXT: blr
;
@ -487,67 +469,59 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
; P9LE: # %bb.0:
; P9LE-NEXT: li r3, 0
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: lis r5, -21386
; P9LE-NEXT: ori r5, r5, 37253
; P9LE-NEXT: extsh r4, r3
; P9LE-NEXT: mulhw r6, r4, r5
; P9LE-NEXT: add r4, r6, r4
; P9LE-NEXT: srwi r6, r4, 31
; P9LE-NEXT: srawi r4, r4, 6
; P9LE-NEXT: add r4, r4, r6
; P9LE-NEXT: mulli r6, r4, 95
; P9LE-NEXT: lis r4, -21386
; P9LE-NEXT: ori r4, r4, 37253
; P9LE-NEXT: extsh r3, r3
; P9LE-NEXT: mulhw r5, r3, r4
; P9LE-NEXT: add r5, r5, r3
; P9LE-NEXT: srwi r6, r5, 31
; P9LE-NEXT: srawi r5, r5, 6
; P9LE-NEXT: add r5, r5, r6
; P9LE-NEXT: mulli r6, r5, 95
; P9LE-NEXT: sub r3, r3, r6
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: mtvsrd v3, r3
; P9LE-NEXT: li r3, 2
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: extsh r6, r3
; P9LE-NEXT: mulhw r7, r6, r5
; P9LE-NEXT: mulhw r7, r6, r4
; P9LE-NEXT: add r6, r7, r6
; P9LE-NEXT: srwi r7, r6, 31
; P9LE-NEXT: srawi r6, r6, 6
; P9LE-NEXT: add r6, r6, r7
; P9LE-NEXT: mulli r7, r6, 95
; P9LE-NEXT: sub r3, r3, r7
; P9LE-NEXT: xxswapd v3, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: mtvsrd v4, r3
; P9LE-NEXT: li r3, 4
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: extsh r7, r3
; P9LE-NEXT: mulhw r8, r7, r5
; P9LE-NEXT: mulhw r8, r7, r4
; P9LE-NEXT: add r7, r8, r7
; P9LE-NEXT: srwi r8, r7, 31
; P9LE-NEXT: srawi r7, r7, 6
; P9LE-NEXT: add r7, r7, r8
; P9LE-NEXT: mulli r8, r7, 95
; P9LE-NEXT: sub r3, r3, r8
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: vmrghh v3, v4, v3
; P9LE-NEXT: mtvsrd v4, r3
; P9LE-NEXT: li r3, 6
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: extsh r8, r3
; P9LE-NEXT: mulhw r5, r8, r5
; P9LE-NEXT: add r5, r5, r8
; P9LE-NEXT: srwi r8, r5, 31
; P9LE-NEXT: srawi r5, r5, 6
; P9LE-NEXT: add r5, r5, r8
; P9LE-NEXT: mulli r8, r5, 95
; P9LE-NEXT: mulhw r4, r8, r4
; P9LE-NEXT: add r4, r4, r8
; P9LE-NEXT: srwi r8, r4, 31
; P9LE-NEXT: srawi r4, r4, 6
; P9LE-NEXT: add r4, r4, r8
; P9LE-NEXT: mulli r8, r4, 95
; P9LE-NEXT: sub r3, r3, r8
; P9LE-NEXT: vmrglh v3, v4, v3
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: xxswapd v2, vs0
; P9LE-NEXT: mtfprd f0, r4
; P9LE-NEXT: vmrglh v2, v2, v4
; P9LE-NEXT: mtvsrd v2, r3
; P9LE-NEXT: vmrghh v2, v2, v4
; P9LE-NEXT: mtvsrd v4, r6
; P9LE-NEXT: vmrglw v2, v2, v3
; P9LE-NEXT: xxswapd v3, vs0
; P9LE-NEXT: mtfprd f0, r6
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtfprd f0, r7
; P9LE-NEXT: vmrglh v3, v4, v3
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtfprd f0, r5
; P9LE-NEXT: xxswapd v5, vs0
; P9LE-NEXT: vmrglh v4, v5, v4
; P9LE-NEXT: mtvsrd v3, r5
; P9LE-NEXT: vmrghh v3, v4, v3
; P9LE-NEXT: mtvsrd v4, r7
; P9LE-NEXT: mtvsrd v5, r4
; P9LE-NEXT: vmrghh v4, v5, v4
; P9LE-NEXT: vmrglw v3, v4, v3
; P9LE-NEXT: vadduhm v2, v2, v3
; P9LE-NEXT: blr
@ -624,69 +598,59 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
; P8LE-LABEL: combine_srem_sdiv:
; P8LE: # %bb.0:
; P8LE-NEXT: xxswapd vs0, v2
; P8LE-NEXT: lis r4, -21386
; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; P8LE-NEXT: ori r4, r4, 37253
; P8LE-NEXT: mffprd r5, f0
; P8LE-NEXT: clrldi r3, r5, 48
; P8LE-NEXT: rldicl r6, r5, 48, 48
; P8LE-NEXT: rldicl r7, r5, 32, 48
; P8LE-NEXT: extsh r8, r3
; P8LE-NEXT: extsh r9, r6
; P8LE-NEXT: extsh r10, r7
; P8LE-NEXT: mulhw r11, r8, r4
; P8LE-NEXT: rldicl r5, r5, 16, 48
; P8LE-NEXT: mulhw r12, r9, r4
; P8LE-NEXT: mulhw r0, r10, r4
; P8LE-NEXT: extsh r30, r5
; P8LE-NEXT: mulhw r4, r30, r4
; P8LE-NEXT: lis r3, -21386
; P8LE-NEXT: ori r3, r3, 37253
; P8LE-NEXT: mffprd r4, f0
; P8LE-NEXT: clrldi r5, r4, 48
; P8LE-NEXT: rldicl r6, r4, 48, 48
; P8LE-NEXT: rldicl r7, r4, 32, 48
; P8LE-NEXT: extsh r5, r5
; P8LE-NEXT: extsh r8, r6
; P8LE-NEXT: extsh r9, r7
; P8LE-NEXT: mulhw r10, r5, r3
; P8LE-NEXT: mulhw r11, r8, r3
; P8LE-NEXT: rldicl r4, r4, 16, 48
; P8LE-NEXT: mulhw r12, r9, r3
; P8LE-NEXT: extsh r0, r4
; P8LE-NEXT: mulhw r3, r0, r3
; P8LE-NEXT: add r10, r10, r5
; P8LE-NEXT: add r8, r11, r8
; P8LE-NEXT: srwi r11, r10, 31
; P8LE-NEXT: add r9, r12, r9
; P8LE-NEXT: srwi r11, r8, 31
; P8LE-NEXT: add r10, r0, r10
; P8LE-NEXT: srawi r8, r8, 6
; P8LE-NEXT: srawi r12, r9, 6
; P8LE-NEXT: srawi r10, r10, 6
; P8LE-NEXT: srawi r12, r8, 6
; P8LE-NEXT: srwi r8, r8, 31
; P8LE-NEXT: add r10, r10, r11
; P8LE-NEXT: add r3, r3, r0
; P8LE-NEXT: srawi r11, r9, 6
; P8LE-NEXT: srwi r9, r9, 31
; P8LE-NEXT: add r8, r8, r11
; P8LE-NEXT: add r4, r4, r30
; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; P8LE-NEXT: srawi r11, r10, 6
; P8LE-NEXT: srwi r10, r10, 31
; P8LE-NEXT: add r9, r12, r9
; P8LE-NEXT: mtfprd f0, r8
; P8LE-NEXT: mulli r12, r8, 95
; P8LE-NEXT: add r10, r11, r10
; P8LE-NEXT: srwi r8, r4, 31
; P8LE-NEXT: mtfprd f1, r9
; P8LE-NEXT: srawi r4, r4, 6
; P8LE-NEXT: mulli r11, r9, 95
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: mtfprd f2, r10
; P8LE-NEXT: mulli r9, r10, 95
; P8LE-NEXT: add r4, r4, r8
; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: mtfprd f3, r4
; P8LE-NEXT: mulli r4, r4, 95
; P8LE-NEXT: xxswapd v1, vs2
; P8LE-NEXT: sub r3, r3, r12
; P8LE-NEXT: mtfprd f0, r3
; P8LE-NEXT: sub r6, r6, r11
; P8LE-NEXT: xxswapd v6, vs3
; P8LE-NEXT: sub r3, r7, r9
; P8LE-NEXT: mtfprd f1, r6
; P8LE-NEXT: mtfprd f4, r3
; P8LE-NEXT: sub r3, r5, r4
; P8LE-NEXT: mtfprd f5, r3
; P8LE-NEXT: xxswapd v4, vs1
; P8LE-NEXT: vmrglh v2, v3, v2
; P8LE-NEXT: xxswapd v3, vs0
; P8LE-NEXT: xxswapd v5, vs4
; P8LE-NEXT: xxswapd v0, vs5
; P8LE-NEXT: vmrglh v3, v4, v3
; P8LE-NEXT: vmrglh v4, v0, v5
; P8LE-NEXT: vmrglh v5, v6, v1
; P8LE-NEXT: vmrglw v3, v4, v3
; P8LE-NEXT: vmrglw v2, v5, v2
; P8LE-NEXT: add r8, r12, r8
; P8LE-NEXT: mtvsrd v2, r10
; P8LE-NEXT: mulli r12, r10, 95
; P8LE-NEXT: add r9, r11, r9
; P8LE-NEXT: srwi r11, r3, 31
; P8LE-NEXT: mtvsrd v3, r8
; P8LE-NEXT: srawi r3, r3, 6
; P8LE-NEXT: mulli r10, r8, 95
; P8LE-NEXT: mtvsrd v4, r9
; P8LE-NEXT: add r3, r3, r11
; P8LE-NEXT: mulli r8, r9, 95
; P8LE-NEXT: vmrghh v2, v3, v2
; P8LE-NEXT: mulli r9, r3, 95
; P8LE-NEXT: sub r5, r5, r12
; P8LE-NEXT: sub r6, r6, r10
; P8LE-NEXT: mtvsrd v3, r5
; P8LE-NEXT: mtvsrd v5, r6
; P8LE-NEXT: sub r5, r7, r8
; P8LE-NEXT: sub r4, r4, r9
; P8LE-NEXT: mtvsrd v0, r5
; P8LE-NEXT: mtvsrd v1, r4
; P8LE-NEXT: vmrghh v3, v5, v3
; P8LE-NEXT: mtvsrd v5, r3
; P8LE-NEXT: vmrghh v0, v1, v0
; P8LE-NEXT: vmrghh v4, v5, v4
; P8LE-NEXT: vmrglw v3, v0, v3
; P8LE-NEXT: vmrglw v2, v4, v2
; P8LE-NEXT: vadduhm v2, v3, v2
; P8LE-NEXT: blr
;
@ -767,47 +731,43 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
; P9LE: # %bb.0:
; P9LE-NEXT: li r3, 0
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: extsh r4, r3
; P9LE-NEXT: srawi r4, r4, 6
; P9LE-NEXT: extsh r3, r3
; P9LE-NEXT: srawi r4, r3, 6
; P9LE-NEXT: addze r4, r4
; P9LE-NEXT: slwi r4, r4, 6
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: mtvsrd v3, r3
; P9LE-NEXT: li r3, 2
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: extsh r4, r3
; P9LE-NEXT: srawi r4, r4, 5
; P9LE-NEXT: extsh r3, r3
; P9LE-NEXT: srawi r4, r3, 5
; P9LE-NEXT: addze r4, r4
; P9LE-NEXT: slwi r4, r4, 5
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: xxswapd v3, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: lis r4, -21386
; P9LE-NEXT: mtvsrd v4, r3
; P9LE-NEXT: li r3, 6
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: lis r5, -21386
; P9LE-NEXT: ori r5, r5, 37253
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: extsh r4, r3
; P9LE-NEXT: mulhw r5, r4, r5
; P9LE-NEXT: add r4, r5, r4
; P9LE-NEXT: extsh r3, r3
; P9LE-NEXT: ori r4, r4, 37253
; P9LE-NEXT: mulhw r4, r3, r4
; P9LE-NEXT: add r4, r4, r3
; P9LE-NEXT: srwi r5, r4, 31
; P9LE-NEXT: srawi r4, r4, 6
; P9LE-NEXT: add r4, r4, r5
; P9LE-NEXT: mulli r4, r4, 95
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: vmrghh v3, v4, v3
; P9LE-NEXT: mtvsrd v4, r3
; P9LE-NEXT: li r3, 4
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: extsh r4, r3
; P9LE-NEXT: srawi r4, r4, 3
; P9LE-NEXT: extsh r3, r3
; P9LE-NEXT: srawi r4, r3, 3
; P9LE-NEXT: addze r4, r4
; P9LE-NEXT: slwi r4, r4, 3
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: vmrglh v3, v4, v3
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: xxswapd v2, vs0
; P9LE-NEXT: vmrglh v2, v4, v2
; P9LE-NEXT: mtvsrd v2, r3
; P9LE-NEXT: vmrghh v2, v4, v2
; P9LE-NEXT: vmrglw v2, v2, v3
; P9LE-NEXT: blr
;
@ -866,42 +826,38 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
; P8LE-NEXT: ori r3, r3, 37253
; P8LE-NEXT: mffprd r4, f0
; P8LE-NEXT: rldicl r5, r4, 16, 48
; P8LE-NEXT: clrldi r7, r4, 48
; P8LE-NEXT: extsh r6, r5
; P8LE-NEXT: extsh r8, r7
; P8LE-NEXT: mulhw r3, r6, r3
; P8LE-NEXT: rldicl r9, r4, 48, 48
; P8LE-NEXT: srawi r8, r8, 6
; P8LE-NEXT: extsh r10, r9
; P8LE-NEXT: clrldi r6, r4, 48
; P8LE-NEXT: extsh r5, r5
; P8LE-NEXT: extsh r6, r6
; P8LE-NEXT: mulhw r3, r5, r3
; P8LE-NEXT: rldicl r7, r4, 48, 48
; P8LE-NEXT: srawi r8, r6, 6
; P8LE-NEXT: extsh r7, r7
; P8LE-NEXT: addze r8, r8
; P8LE-NEXT: rldicl r4, r4, 32, 48
; P8LE-NEXT: srawi r10, r10, 5
; P8LE-NEXT: srawi r9, r7, 5
; P8LE-NEXT: extsh r4, r4
; P8LE-NEXT: slwi r8, r8, 6
; P8LE-NEXT: add r3, r3, r6
; P8LE-NEXT: addze r6, r10
; P8LE-NEXT: sub r7, r7, r8
; P8LE-NEXT: add r3, r3, r5
; P8LE-NEXT: addze r9, r9
; P8LE-NEXT: sub r6, r6, r8
; P8LE-NEXT: srwi r10, r3, 31
; P8LE-NEXT: srawi r3, r3, 6
; P8LE-NEXT: mtfprd f0, r7
; P8LE-NEXT: slwi r6, r6, 5
; P8LE-NEXT: slwi r8, r9, 5
; P8LE-NEXT: mtvsrd v2, r6
; P8LE-NEXT: add r3, r3, r10
; P8LE-NEXT: extsh r10, r4
; P8LE-NEXT: sub r6, r9, r6
; P8LE-NEXT: srawi r9, r4, 3
; P8LE-NEXT: sub r6, r7, r8
; P8LE-NEXT: mulli r3, r3, 95
; P8LE-NEXT: srawi r8, r10, 3
; P8LE-NEXT: mtfprd f1, r6
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: addze r7, r8
; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: addze r7, r9
; P8LE-NEXT: mtvsrd v3, r6
; P8LE-NEXT: vmrghh v2, v3, v2
; P8LE-NEXT: sub r3, r5, r3
; P8LE-NEXT: slwi r5, r7, 3
; P8LE-NEXT: sub r4, r4, r5
; P8LE-NEXT: mtfprd f2, r3
; P8LE-NEXT: mtfprd f3, r4
; P8LE-NEXT: xxswapd v4, vs2
; P8LE-NEXT: vmrglh v2, v3, v2
; P8LE-NEXT: xxswapd v5, vs3
; P8LE-NEXT: vmrglh v3, v4, v5
; P8LE-NEXT: mtvsrd v4, r3
; P8LE-NEXT: mtvsrd v5, r4
; P8LE-NEXT: vmrghh v3, v4, v5
; P8LE-NEXT: vmrglw v2, v3, v2
; P8LE-NEXT: blr
;
@ -959,48 +915,46 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
; P9LE: # %bb.0:
; P9LE-NEXT: li r3, 2
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: lis r5, -14230
; P9LE-NEXT: ori r5, r5, 30865
; P9LE-NEXT: extsh r4, r3
; P9LE-NEXT: mulhw r5, r4, r5
; P9LE-NEXT: add r4, r5, r4
; P9LE-NEXT: lis r4, -14230
; P9LE-NEXT: ori r4, r4, 30865
; P9LE-NEXT: extsh r3, r3
; P9LE-NEXT: mulhw r4, r3, r4
; P9LE-NEXT: add r4, r4, r3
; P9LE-NEXT: srwi r5, r4, 31
; P9LE-NEXT: srawi r4, r4, 9
; P9LE-NEXT: add r4, r4, r5
; P9LE-NEXT: lis r5, -19946
; P9LE-NEXT: mulli r4, r4, 654
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: lis r4, -19946
; P9LE-NEXT: mtvsrd v3, r3
; P9LE-NEXT: li r3, 0
; P9LE-NEXT: mtvsrd v4, r3
; P9LE-NEXT: li r3, 4
; P9LE-NEXT: ori r5, r5, 17097
; P9LE-NEXT: xxlxor v3, v3, v3
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: extsh r4, r3
; P9LE-NEXT: mulhw r5, r4, r5
; P9LE-NEXT: add r4, r5, r4
; P9LE-NEXT: extsh r3, r3
; P9LE-NEXT: ori r4, r4, 17097
; P9LE-NEXT: mulhw r4, r3, r4
; P9LE-NEXT: add r4, r4, r3
; P9LE-NEXT: srwi r5, r4, 31
; P9LE-NEXT: srawi r4, r4, 4
; P9LE-NEXT: add r4, r4, r5
; P9LE-NEXT: lis r5, 24749
; P9LE-NEXT: mulli r4, r4, 23
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: vmrghh v3, v3, v4
; P9LE-NEXT: mtvsrd v4, r3
; P9LE-NEXT: li r3, 6
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: extsh r4, r3
; P9LE-NEXT: ori r5, r5, 47143
; P9LE-NEXT: mulhw r4, r4, r5
; P9LE-NEXT: lis r4, 24749
; P9LE-NEXT: ori r4, r4, 47143
; P9LE-NEXT: extsh r3, r3
; P9LE-NEXT: mulhw r4, r3, r4
; P9LE-NEXT: srwi r5, r4, 31
; P9LE-NEXT: srawi r4, r4, 11
; P9LE-NEXT: add r4, r4, r5
; P9LE-NEXT: mulli r4, r4, 5423
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: vmrglh v3, v4, v3
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: xxswapd v2, vs0
; P9LE-NEXT: vmrglh v2, v2, v4
; P9LE-NEXT: mtvsrd v2, r3
; P9LE-NEXT: vmrghh v2, v2, v4
; P9LE-NEXT: vmrglw v2, v2, v3
; P9LE-NEXT: blr
;
@ -1058,49 +1012,47 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
; P8LE-LABEL: dont_fold_srem_one:
; P8LE: # %bb.0:
; P8LE-NEXT: xxswapd vs0, v2
; P8LE-NEXT: lis r3, 24749
; P8LE-NEXT: lis r7, -19946
; P8LE-NEXT: lis r9, -14230
; P8LE-NEXT: xxlxor v5, v5, v5
; P8LE-NEXT: ori r3, r3, 47143
; P8LE-NEXT: ori r7, r7, 17097
; P8LE-NEXT: mffprd r4, f0
; P8LE-NEXT: rldicl r5, r4, 16, 48
; P8LE-NEXT: rldicl r6, r4, 32, 48
; P8LE-NEXT: rldicl r4, r4, 48, 48
; P8LE-NEXT: extsh r8, r5
; P8LE-NEXT: extsh r10, r6
; P8LE-NEXT: mulhw r3, r8, r3
; P8LE-NEXT: ori r8, r9, 30865
; P8LE-NEXT: extsh r9, r4
; P8LE-NEXT: mulhw r7, r10, r7
; P8LE-NEXT: mulhw r8, r9, r8
; P8LE-NEXT: add r7, r7, r10
; P8LE-NEXT: srwi r10, r3, 31
; P8LE-NEXT: add r8, r8, r9
; P8LE-NEXT: srawi r3, r3, 11
; P8LE-NEXT: srwi r9, r7, 31
; P8LE-NEXT: srawi r7, r7, 4
; P8LE-NEXT: add r3, r3, r10
; P8LE-NEXT: add r7, r7, r9
; P8LE-NEXT: lis r5, 24749
; P8LE-NEXT: lis r6, -19946
; P8LE-NEXT: lis r8, -14230
; P8LE-NEXT: ori r5, r5, 47143
; P8LE-NEXT: ori r6, r6, 17097
; P8LE-NEXT: ori r8, r8, 30865
; P8LE-NEXT: mffprd r3, f0
; P8LE-NEXT: rldicl r4, r3, 16, 48
; P8LE-NEXT: rldicl r7, r3, 32, 48
; P8LE-NEXT: rldicl r3, r3, 48, 48
; P8LE-NEXT: extsh r4, r4
; P8LE-NEXT: extsh r7, r7
; P8LE-NEXT: extsh r3, r3
; P8LE-NEXT: mulhw r5, r4, r5
; P8LE-NEXT: mulhw r6, r7, r6
; P8LE-NEXT: mulhw r8, r3, r8
; P8LE-NEXT: srwi r9, r5, 31
; P8LE-NEXT: srawi r5, r5, 11
; P8LE-NEXT: add r6, r6, r7
; P8LE-NEXT: add r8, r8, r3
; P8LE-NEXT: add r5, r5, r9
; P8LE-NEXT: srwi r9, r6, 31
; P8LE-NEXT: srawi r6, r6, 4
; P8LE-NEXT: add r6, r6, r9
; P8LE-NEXT: srwi r9, r8, 31
; P8LE-NEXT: srawi r8, r8, 9
; P8LE-NEXT: mulli r3, r3, 5423
; P8LE-NEXT: mulli r5, r5, 5423
; P8LE-NEXT: add r8, r8, r9
; P8LE-NEXT: mulli r7, r7, 23
; P8LE-NEXT: mulli r6, r6, 23
; P8LE-NEXT: li r9, 0
; P8LE-NEXT: mulli r8, r8, 654
; P8LE-NEXT: sub r3, r5, r3
; P8LE-NEXT: mtfprd f0, r3
; P8LE-NEXT: sub r3, r6, r7
; P8LE-NEXT: sub r4, r4, r8
; P8LE-NEXT: mtfprd f1, r3
; P8LE-NEXT: mtfprd f2, r4
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: xxswapd v4, vs2
; P8LE-NEXT: vmrglh v2, v2, v3
; P8LE-NEXT: vmrglh v3, v4, v5
; P8LE-NEXT: vmrglw v2, v2, v3
; P8LE-NEXT: mtvsrd v2, r9
; P8LE-NEXT: sub r4, r4, r5
; P8LE-NEXT: sub r5, r7, r6
; P8LE-NEXT: mtvsrd v3, r4
; P8LE-NEXT: sub r3, r3, r8
; P8LE-NEXT: mtvsrd v4, r5
; P8LE-NEXT: mtvsrd v5, r3
; P8LE-NEXT: vmrghh v3, v3, v4
; P8LE-NEXT: vmrghh v2, v5, v2
; P8LE-NEXT: vmrglw v2, v3, v2
; P8LE-NEXT: blr
;
; P8BE-LABEL: dont_fold_srem_one:
@ -1161,43 +1113,41 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
; P9LE: # %bb.0:
; P9LE-NEXT: li r3, 4
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: lis r5, -19946
; P9LE-NEXT: ori r5, r5, 17097
; P9LE-NEXT: extsh r4, r3
; P9LE-NEXT: mulhw r5, r4, r5
; P9LE-NEXT: add r4, r5, r4
; P9LE-NEXT: lis r4, -19946
; P9LE-NEXT: ori r4, r4, 17097
; P9LE-NEXT: extsh r3, r3
; P9LE-NEXT: mulhw r4, r3, r4
; P9LE-NEXT: add r4, r4, r3
; P9LE-NEXT: srwi r5, r4, 31
; P9LE-NEXT: srawi r4, r4, 4
; P9LE-NEXT: add r4, r4, r5
; P9LE-NEXT: lis r5, 24749
; P9LE-NEXT: mulli r4, r4, 23
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: lis r4, 24749
; P9LE-NEXT: mtvsrd v3, r3
; P9LE-NEXT: li r3, 6
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: extsh r4, r3
; P9LE-NEXT: ori r5, r5, 47143
; P9LE-NEXT: mulhw r4, r4, r5
; P9LE-NEXT: extsh r3, r3
; P9LE-NEXT: ori r4, r4, 47143
; P9LE-NEXT: mulhw r4, r3, r4
; P9LE-NEXT: srwi r5, r4, 31
; P9LE-NEXT: srawi r4, r4, 11
; P9LE-NEXT: add r4, r4, r5
; P9LE-NEXT: mulli r4, r4, 5423
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: xxswapd v3, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: mtvsrd v4, r3
; P9LE-NEXT: li r3, 2
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: extsh r4, r3
; P9LE-NEXT: srawi r4, r4, 15
; P9LE-NEXT: extsh r3, r3
; P9LE-NEXT: srawi r4, r3, 15
; P9LE-NEXT: addze r4, r4
; P9LE-NEXT: slwi r4, r4, 15
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: xxswapd v2, vs0
; P9LE-NEXT: vmrglh v3, v4, v3
; P9LE-NEXT: xxlxor v4, v4, v4
; P9LE-NEXT: vmrglh v2, v2, v4
; P9LE-NEXT: mtvsrd v2, r3
; P9LE-NEXT: li r3, 0
; P9LE-NEXT: vmrghh v3, v4, v3
; P9LE-NEXT: mtvsrd v4, r3
; P9LE-NEXT: vmrghh v2, v2, v4
; P9LE-NEXT: vmrglw v2, v3, v2
; P9LE-NEXT: blr
;
@ -1252,42 +1202,40 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
; P8LE-NEXT: xxswapd vs0, v2
; P8LE-NEXT: lis r4, 24749
; P8LE-NEXT: lis r5, -19946
; P8LE-NEXT: xxlxor v5, v5, v5
; P8LE-NEXT: ori r4, r4, 47143
; P8LE-NEXT: ori r5, r5, 17097
; P8LE-NEXT: mffprd r3, f0
; P8LE-NEXT: rldicl r6, r3, 16, 48
; P8LE-NEXT: rldicl r7, r3, 32, 48
; P8LE-NEXT: extsh r8, r6
; P8LE-NEXT: extsh r9, r7
; P8LE-NEXT: mulhw r4, r8, r4
; P8LE-NEXT: mulhw r5, r9, r5
; P8LE-NEXT: extsh r6, r6
; P8LE-NEXT: extsh r7, r7
; P8LE-NEXT: mulhw r4, r6, r4
; P8LE-NEXT: mulhw r5, r7, r5
; P8LE-NEXT: rldicl r3, r3, 48, 48
; P8LE-NEXT: extsh r3, r3
; P8LE-NEXT: srwi r8, r4, 31
; P8LE-NEXT: srawi r4, r4, 11
; P8LE-NEXT: add r5, r5, r9
; P8LE-NEXT: add r5, r5, r7
; P8LE-NEXT: add r4, r4, r8
; P8LE-NEXT: srwi r8, r5, 31
; P8LE-NEXT: srawi r5, r5, 4
; P8LE-NEXT: mulli r4, r4, 5423
; P8LE-NEXT: add r5, r5, r8
; P8LE-NEXT: extsh r8, r3
; P8LE-NEXT: srawi r9, r3, 15
; P8LE-NEXT: li r8, 0
; P8LE-NEXT: mulli r5, r5, 23
; P8LE-NEXT: srawi r8, r8, 15
; P8LE-NEXT: mtvsrd v2, r8
; P8LE-NEXT: sub r4, r6, r4
; P8LE-NEXT: addze r6, r8
; P8LE-NEXT: mtfprd f0, r4
; P8LE-NEXT: slwi r4, r6, 15
; P8LE-NEXT: addze r6, r9
; P8LE-NEXT: slwi r6, r6, 15
; P8LE-NEXT: mtvsrd v3, r4
; P8LE-NEXT: sub r5, r7, r5
; P8LE-NEXT: sub r3, r3, r4
; P8LE-NEXT: mtfprd f1, r5
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: mtfprd f2, r3
; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: xxswapd v4, vs2
; P8LE-NEXT: vmrglh v2, v2, v3
; P8LE-NEXT: vmrglh v3, v4, v5
; P8LE-NEXT: vmrglw v2, v2, v3
; P8LE-NEXT: sub r3, r3, r6
; P8LE-NEXT: mtvsrd v4, r5
; P8LE-NEXT: mtvsrd v5, r3
; P8LE-NEXT: vmrghh v3, v3, v4
; P8LE-NEXT: vmrghh v2, v5, v2
; P8LE-NEXT: vmrglw v2, v3, v2
; P8LE-NEXT: blr
;
; P8BE-LABEL: dont_fold_urem_i16_smax:

View File

@ -15,10 +15,10 @@ entry:
}
; CHECK-LABEL: @bar0
; CHECK-DAG: xxswapd 1, 1
; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
; CHECK-DAG: xxspltd [[REG2:[0-9]+]]
; CHECK: xxpermdi [[REG3:[0-9]+]], [[REG2]], [[REG1]], 1
; CHECK: stxvd2x [[REG3]]
; CHECK: xxmrgld [[REG2:[0-9]+]], 1, [[REG1]]
; CHECK: stxvd2x [[REG2]]
; CHECK-NOT: xxswapd
define void @bar1(double %y) {
@ -30,10 +30,10 @@ entry:
}
; CHECK-LABEL: @bar1
; CHECK-DAG: xxswapd 1, 1
; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
; CHECK-DAG: xxspltd [[REG2:[0-9]+]]
; CHECK: xxmrghd [[REG3:[0-9]+]], [[REG1]], [[REG2]]
; CHECK: stxvd2x [[REG3]]
; CHECK: xxpermdi [[REG2:[0-9]+]], [[REG1]], 1, 1
; CHECK: stxvd2x [[REG2]]
; CHECK-NOT: xxswapd
define void @baz0() {

View File

@ -27,7 +27,7 @@ define void @bar0() {
; CHECK: ld r3, .LC0@toc@l(r3)
; CHECK: addis r3, r2, .LC2@toc@ha
; CHECK: ld r3, .LC2@toc@l(r3)
; CHECK: xxpermdi vs0, vs0, vs1, 1
; CHECK: xxmrgld vs0, vs0, vs1
; CHECK: stxvd2x vs0, 0, r3
; CHECK: blr
;
@ -38,7 +38,7 @@ define void @bar0() {
; CHECK-P9-NOVECTOR: addis r3, r2, .LC1@toc@ha
; CHECK-P9-NOVECTOR: addis r3, r2, .LC2@toc@ha
; CHECK-P9-NOVECTOR: ld r3, .LC2@toc@l(r3)
; CHECK-P9-NOVECTOR: xxpermdi vs0, vs1, vs0, 1
; CHECK-P9-NOVECTOR: xxmrgld vs0, vs1, vs0
; CHECK-P9-NOVECTOR: stxvd2x vs0, 0, r3
; CHECK-P9-NOVECTOR: blr
;
@ -72,7 +72,7 @@ define void @bar1() {
; CHECK: ld r3, .LC0@toc@l(r3)
; CHECK: addis r3, r2, .LC2@toc@ha
; CHECK: ld r3, .LC2@toc@l(r3)
; CHECK: xxmrghd vs0, vs1, vs0
; CHECK: xxpermdi vs0, vs1, vs0, 1
; CHECK: stxvd2x vs0, 0, r3
; CHECK: blr
;
@ -83,7 +83,7 @@ define void @bar1() {
; CHECK-P9-NOVECTOR: addis r3, r2, .LC1@toc@ha
; CHECK-P9-NOVECTOR: addis r3, r2, .LC2@toc@ha
; CHECK-P9-NOVECTOR: ld r3, .LC2@toc@l(r3)
; CHECK-P9-NOVECTOR: xxmrghd vs0, vs0, vs1
; CHECK-P9-NOVECTOR: xxpermdi vs0, vs0, vs1, 1
; CHECK-P9-NOVECTOR: stxvd2x vs0, 0, r3
; CHECK-P9-NOVECTOR: blr
;

View File

@ -13,53 +13,50 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
; P9LE: # %bb.0:
; P9LE-NEXT: li r3, 4
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: lis r5, 21399
; P9LE-NEXT: ori r5, r5, 33437
; P9LE-NEXT: clrlwi r4, r3, 16
; P9LE-NEXT: mulhwu r4, r4, r5
; P9LE-NEXT: lis r5, 16727
; P9LE-NEXT: ori r5, r5, 2287
; P9LE-NEXT: lis r4, 21399
; P9LE-NEXT: ori r4, r4, 33437
; P9LE-NEXT: clrlwi r3, r3, 16
; P9LE-NEXT: mulhwu r4, r3, r4
; P9LE-NEXT: srwi r4, r4, 5
; P9LE-NEXT: mulli r4, r4, 98
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: lis r4, 16727
; P9LE-NEXT: mtvsrd v3, r3
; P9LE-NEXT: li r3, 6
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: clrlwi r4, r3, 16
; P9LE-NEXT: mulhwu r4, r4, r5
; P9LE-NEXT: lis r5, 8456
; P9LE-NEXT: ori r5, r5, 16913
; P9LE-NEXT: clrlwi r3, r3, 16
; P9LE-NEXT: ori r4, r4, 2287
; P9LE-NEXT: mulhwu r4, r3, r4
; P9LE-NEXT: srwi r4, r4, 8
; P9LE-NEXT: mulli r4, r4, 1003
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: xxswapd v3, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: mtvsrd v4, r3
; P9LE-NEXT: li r3, 2
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: rlwinm r4, r3, 30, 18, 31
; P9LE-NEXT: mulhwu r4, r4, r5
; P9LE-NEXT: lis r5, 22765
; P9LE-NEXT: ori r5, r5, 8969
; P9LE-NEXT: srwi r4, r4, 2
; P9LE-NEXT: mulli r4, r4, 124
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: lis r5, 8456
; P9LE-NEXT: ori r5, r5, 16913
; P9LE-NEXT: vmrghh v3, v4, v3
; P9LE-NEXT: clrlwi r4, r3, 16
; P9LE-NEXT: rlwinm r3, r3, 30, 18, 31
; P9LE-NEXT: mulhwu r3, r3, r5
; P9LE-NEXT: srwi r3, r3, 2
; P9LE-NEXT: mulli r3, r3, 124
; P9LE-NEXT: sub r3, r4, r3
; P9LE-NEXT: lis r4, 22765
; P9LE-NEXT: mtvsrd v4, r3
; P9LE-NEXT: li r3, 0
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: clrlwi r4, r3, 16
; P9LE-NEXT: mulhwu r5, r4, r5
; P9LE-NEXT: sub r4, r4, r5
; P9LE-NEXT: srwi r4, r4, 1
; P9LE-NEXT: add r4, r4, r5
; P9LE-NEXT: clrlwi r3, r3, 16
; P9LE-NEXT: ori r4, r4, 8969
; P9LE-NEXT: mulhwu r4, r3, r4
; P9LE-NEXT: sub r5, r3, r4
; P9LE-NEXT: srwi r5, r5, 1
; P9LE-NEXT: add r4, r5, r4
; P9LE-NEXT: srwi r4, r4, 6
; P9LE-NEXT: mulli r4, r4, 95
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: vmrglh v3, v4, v3
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: xxswapd v2, vs0
; P9LE-NEXT: vmrglh v2, v4, v2
; P9LE-NEXT: mtvsrd v2, r3
; P9LE-NEXT: vmrghh v2, v4, v2
; P9LE-NEXT: vmrglw v2, v3, v2
; P9LE-NEXT: blr
;
@ -123,50 +120,47 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
; P8LE-NEXT: xxswapd vs0, v2
; P8LE-NEXT: lis r3, 22765
; P8LE-NEXT: lis r7, 21399
; P8LE-NEXT: lis r10, 16727
; P8LE-NEXT: lis r9, 16727
; P8LE-NEXT: lis r10, 8456
; P8LE-NEXT: ori r3, r3, 8969
; P8LE-NEXT: ori r7, r7, 33437
; P8LE-NEXT: ori r10, r10, 2287
; P8LE-NEXT: ori r9, r9, 2287
; P8LE-NEXT: ori r10, r10, 16913
; P8LE-NEXT: mffprd r4, f0
; P8LE-NEXT: clrldi r6, r4, 48
; P8LE-NEXT: rldicl r5, r4, 32, 48
; P8LE-NEXT: clrlwi r9, r6, 16
; P8LE-NEXT: clrlwi r6, r6, 16
; P8LE-NEXT: rldicl r8, r4, 16, 48
; P8LE-NEXT: clrlwi r11, r5, 16
; P8LE-NEXT: mulhwu r3, r9, r3
; P8LE-NEXT: clrlwi r12, r8, 16
; P8LE-NEXT: mulhwu r7, r11, r7
; P8LE-NEXT: lis r11, 8456
; P8LE-NEXT: clrlwi r5, r5, 16
; P8LE-NEXT: mulhwu r3, r6, r3
; P8LE-NEXT: rldicl r4, r4, 48, 48
; P8LE-NEXT: mulhwu r10, r12, r10
; P8LE-NEXT: ori r11, r11, 16913
; P8LE-NEXT: rlwinm r12, r4, 30, 18, 31
; P8LE-NEXT: mulhwu r11, r12, r11
; P8LE-NEXT: sub r9, r9, r3
; P8LE-NEXT: srwi r9, r9, 1
; P8LE-NEXT: clrlwi r8, r8, 16
; P8LE-NEXT: rlwinm r11, r4, 30, 18, 31
; P8LE-NEXT: mulhwu r7, r5, r7
; P8LE-NEXT: clrlwi r4, r4, 16
; P8LE-NEXT: mulhwu r9, r8, r9
; P8LE-NEXT: mulhwu r10, r11, r10
; P8LE-NEXT: sub r11, r6, r3
; P8LE-NEXT: srwi r11, r11, 1
; P8LE-NEXT: srwi r7, r7, 5
; P8LE-NEXT: add r3, r9, r3
; P8LE-NEXT: srwi r9, r10, 8
; P8LE-NEXT: add r3, r11, r3
; P8LE-NEXT: srwi r9, r9, 8
; P8LE-NEXT: srwi r10, r10, 2
; P8LE-NEXT: srwi r3, r3, 6
; P8LE-NEXT: mulli r7, r7, 98
; P8LE-NEXT: srwi r10, r11, 2
; P8LE-NEXT: mulli r9, r9, 1003
; P8LE-NEXT: mulli r3, r3, 95
; P8LE-NEXT: mulli r10, r10, 124
; P8LE-NEXT: sub r5, r5, r7
; P8LE-NEXT: sub r7, r8, r9
; P8LE-NEXT: mtfprd f0, r5
; P8LE-NEXT: sub r3, r6, r3
; P8LE-NEXT: mtvsrd v2, r5
; P8LE-NEXT: sub r4, r4, r10
; P8LE-NEXT: mtfprd f1, r7
; P8LE-NEXT: mtfprd f2, r3
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: mtfprd f3, r4
; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: xxswapd v4, vs2
; P8LE-NEXT: xxswapd v5, vs3
; P8LE-NEXT: vmrglh v2, v3, v2
; P8LE-NEXT: vmrglh v3, v5, v4
; P8LE-NEXT: mtvsrd v3, r7
; P8LE-NEXT: mtvsrd v4, r3
; P8LE-NEXT: mtvsrd v5, r4
; P8LE-NEXT: vmrghh v2, v3, v2
; P8LE-NEXT: vmrghh v3, v5, v4
; P8LE-NEXT: vmrglw v2, v2, v3
; P8LE-NEXT: blr
;
@ -230,56 +224,52 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
; P9LE: # %bb.0:
; P9LE-NEXT: li r3, 0
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: lis r5, 22765
; P9LE-NEXT: ori r5, r5, 8969
; P9LE-NEXT: clrlwi r4, r3, 16
; P9LE-NEXT: mulhwu r6, r4, r5
; P9LE-NEXT: sub r4, r4, r6
; P9LE-NEXT: srwi r4, r4, 1
; P9LE-NEXT: add r4, r4, r6
; P9LE-NEXT: srwi r4, r4, 6
; P9LE-NEXT: mulli r4, r4, 95
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: lis r4, 22765
; P9LE-NEXT: ori r4, r4, 8969
; P9LE-NEXT: clrlwi r3, r3, 16
; P9LE-NEXT: mulhwu r5, r3, r4
; P9LE-NEXT: sub r6, r3, r5
; P9LE-NEXT: srwi r6, r6, 1
; P9LE-NEXT: add r5, r6, r5
; P9LE-NEXT: srwi r5, r5, 6
; P9LE-NEXT: mulli r5, r5, 95
; P9LE-NEXT: sub r3, r3, r5
; P9LE-NEXT: mtvsrd v3, r3
; P9LE-NEXT: li r3, 2
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: clrlwi r4, r3, 16
; P9LE-NEXT: mulhwu r6, r4, r5
; P9LE-NEXT: sub r4, r4, r6
; P9LE-NEXT: srwi r4, r4, 1
; P9LE-NEXT: add r4, r4, r6
; P9LE-NEXT: srwi r4, r4, 6
; P9LE-NEXT: mulli r4, r4, 95
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: xxswapd v3, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: clrlwi r3, r3, 16
; P9LE-NEXT: mulhwu r5, r3, r4
; P9LE-NEXT: sub r6, r3, r5
; P9LE-NEXT: srwi r6, r6, 1
; P9LE-NEXT: add r5, r6, r5
; P9LE-NEXT: srwi r5, r5, 6
; P9LE-NEXT: mulli r5, r5, 95
; P9LE-NEXT: sub r3, r3, r5
; P9LE-NEXT: mtvsrd v4, r3
; P9LE-NEXT: li r3, 4
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: clrlwi r4, r3, 16
; P9LE-NEXT: mulhwu r6, r4, r5
; P9LE-NEXT: sub r4, r4, r6
; P9LE-NEXT: srwi r4, r4, 1
; P9LE-NEXT: add r4, r4, r6
; P9LE-NEXT: srwi r4, r4, 6
; P9LE-NEXT: mulli r4, r4, 95
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: clrlwi r3, r3, 16
; P9LE-NEXT: mulhwu r5, r3, r4
; P9LE-NEXT: sub r6, r3, r5
; P9LE-NEXT: srwi r6, r6, 1
; P9LE-NEXT: add r5, r6, r5
; P9LE-NEXT: srwi r5, r5, 6
; P9LE-NEXT: mulli r5, r5, 95
; P9LE-NEXT: sub r3, r3, r5
; P9LE-NEXT: vmrghh v3, v4, v3
; P9LE-NEXT: mtvsrd v4, r3
; P9LE-NEXT: li r3, 6
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: clrlwi r4, r3, 16
; P9LE-NEXT: mulhwu r5, r4, r5
; P9LE-NEXT: sub r4, r4, r5
; P9LE-NEXT: srwi r4, r4, 1
; P9LE-NEXT: add r4, r4, r5
; P9LE-NEXT: clrlwi r3, r3, 16
; P9LE-NEXT: mulhwu r4, r3, r4
; P9LE-NEXT: sub r5, r3, r4
; P9LE-NEXT: srwi r5, r5, 1
; P9LE-NEXT: add r4, r5, r4
; P9LE-NEXT: srwi r4, r4, 6
; P9LE-NEXT: mulli r4, r4, 95
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: vmrglh v3, v4, v3
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: xxswapd v2, vs0
; P9LE-NEXT: vmrglh v2, v2, v4
; P9LE-NEXT: mtvsrd v2, r3
; P9LE-NEXT: vmrghh v2, v2, v4
; P9LE-NEXT: vmrglw v2, v2, v3
; P9LE-NEXT: blr
;
@ -344,36 +334,34 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
; P8LE: # %bb.0:
; P8LE-NEXT: xxswapd vs0, v2
; P8LE-NEXT: lis r3, 22765
; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; P8LE-NEXT: ori r3, r3, 8969
; P8LE-NEXT: mffprd r4, f0
; P8LE-NEXT: clrldi r5, r4, 48
; P8LE-NEXT: rldicl r6, r4, 48, 48
; P8LE-NEXT: clrlwi r8, r5, 16
; P8LE-NEXT: clrlwi r5, r5, 16
; P8LE-NEXT: rldicl r7, r4, 32, 48
; P8LE-NEXT: clrlwi r9, r6, 16
; P8LE-NEXT: clrlwi r6, r6, 16
; P8LE-NEXT: mulhwu r8, r5, r3
; P8LE-NEXT: rldicl r4, r4, 16, 48
; P8LE-NEXT: mulhwu r10, r8, r3
; P8LE-NEXT: clrlwi r11, r7, 16
; P8LE-NEXT: clrlwi r0, r4, 16
; P8LE-NEXT: mulhwu r12, r9, r3
; P8LE-NEXT: mulhwu r30, r11, r3
; P8LE-NEXT: mulhwu r3, r0, r3
; P8LE-NEXT: sub r8, r8, r10
; P8LE-NEXT: srwi r8, r8, 1
; P8LE-NEXT: sub r9, r9, r12
; P8LE-NEXT: add r8, r8, r10
; P8LE-NEXT: sub r10, r11, r30
; P8LE-NEXT: sub r11, r0, r3
; P8LE-NEXT: srwi r9, r9, 1
; P8LE-NEXT: srwi r10, r10, 1
; P8LE-NEXT: clrlwi r7, r7, 16
; P8LE-NEXT: mulhwu r9, r6, r3
; P8LE-NEXT: clrlwi r4, r4, 16
; P8LE-NEXT: mulhwu r10, r7, r3
; P8LE-NEXT: mulhwu r3, r4, r3
; P8LE-NEXT: sub r11, r5, r8
; P8LE-NEXT: sub r12, r6, r9
; P8LE-NEXT: srwi r11, r11, 1
; P8LE-NEXT: add r8, r11, r8
; P8LE-NEXT: sub r11, r7, r10
; P8LE-NEXT: srwi r12, r12, 1
; P8LE-NEXT: add r9, r12, r9
; P8LE-NEXT: sub r12, r4, r3
; P8LE-NEXT: srwi r11, r11, 1
; P8LE-NEXT: add r9, r9, r12
; P8LE-NEXT: srwi r8, r8, 6
; P8LE-NEXT: add r10, r10, r30
; P8LE-NEXT: add r3, r11, r3
; P8LE-NEXT: add r10, r11, r10
; P8LE-NEXT: srwi r11, r12, 1
; P8LE-NEXT: srwi r9, r9, 6
; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; P8LE-NEXT: add r3, r11, r3
; P8LE-NEXT: mulli r8, r8, 95
; P8LE-NEXT: srwi r10, r10, 6
; P8LE-NEXT: srwi r3, r3, 6
@ -382,18 +370,14 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
; P8LE-NEXT: mulli r3, r3, 95
; P8LE-NEXT: sub r5, r5, r8
; P8LE-NEXT: sub r6, r6, r9
; P8LE-NEXT: mtfprd f0, r5
; P8LE-NEXT: mtvsrd v2, r5
; P8LE-NEXT: sub r5, r7, r10
; P8LE-NEXT: sub r3, r4, r3
; P8LE-NEXT: mtfprd f1, r6
; P8LE-NEXT: mtfprd f2, r5
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: mtfprd f3, r3
; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: xxswapd v4, vs2
; P8LE-NEXT: xxswapd v5, vs3
; P8LE-NEXT: vmrglh v2, v3, v2
; P8LE-NEXT: vmrglh v3, v5, v4
; P8LE-NEXT: mtvsrd v3, r6
; P8LE-NEXT: mtvsrd v4, r5
; P8LE-NEXT: mtvsrd v5, r3
; P8LE-NEXT: vmrghh v2, v3, v2
; P8LE-NEXT: vmrghh v3, v5, v4
; P8LE-NEXT: vmrglw v2, v3, v2
; P8LE-NEXT: blr
;
@ -461,67 +445,59 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
; P9LE: # %bb.0:
; P9LE-NEXT: li r3, 0
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: lis r5, 22765
; P9LE-NEXT: ori r5, r5, 8969
; P9LE-NEXT: clrlwi r4, r3, 16
; P9LE-NEXT: mulhwu r6, r4, r5
; P9LE-NEXT: sub r4, r4, r6
; P9LE-NEXT: srwi r4, r4, 1
; P9LE-NEXT: add r4, r4, r6
; P9LE-NEXT: srwi r4, r4, 6
; P9LE-NEXT: mulli r6, r4, 95
; P9LE-NEXT: lis r4, 22765
; P9LE-NEXT: ori r4, r4, 8969
; P9LE-NEXT: clrlwi r3, r3, 16
; P9LE-NEXT: mulhwu r5, r3, r4
; P9LE-NEXT: sub r6, r3, r5
; P9LE-NEXT: srwi r6, r6, 1
; P9LE-NEXT: add r5, r6, r5
; P9LE-NEXT: srwi r5, r5, 6
; P9LE-NEXT: mulli r6, r5, 95
; P9LE-NEXT: sub r3, r3, r6
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: mtvsrd v3, r3
; P9LE-NEXT: li r3, 2
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: clrlwi r6, r3, 16
; P9LE-NEXT: mulhwu r7, r6, r5
; P9LE-NEXT: mulhwu r7, r6, r4
; P9LE-NEXT: sub r6, r6, r7
; P9LE-NEXT: srwi r6, r6, 1
; P9LE-NEXT: add r6, r6, r7
; P9LE-NEXT: srwi r6, r6, 6
; P9LE-NEXT: mulli r7, r6, 95
; P9LE-NEXT: sub r3, r3, r7
; P9LE-NEXT: xxswapd v3, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: mtvsrd v4, r3
; P9LE-NEXT: li r3, 4
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: clrlwi r7, r3, 16
; P9LE-NEXT: mulhwu r8, r7, r5
; P9LE-NEXT: mulhwu r8, r7, r4
; P9LE-NEXT: sub r7, r7, r8
; P9LE-NEXT: srwi r7, r7, 1
; P9LE-NEXT: add r7, r7, r8
; P9LE-NEXT: srwi r7, r7, 6
; P9LE-NEXT: mulli r8, r7, 95
; P9LE-NEXT: sub r3, r3, r8
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: vmrghh v3, v4, v3
; P9LE-NEXT: mtvsrd v4, r3
; P9LE-NEXT: li r3, 6
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: clrlwi r8, r3, 16
; P9LE-NEXT: mulhwu r5, r8, r5
; P9LE-NEXT: sub r8, r8, r5
; P9LE-NEXT: mulhwu r4, r8, r4
; P9LE-NEXT: sub r8, r8, r4
; P9LE-NEXT: srwi r8, r8, 1
; P9LE-NEXT: add r5, r8, r5
; P9LE-NEXT: srwi r5, r5, 6
; P9LE-NEXT: mulli r8, r5, 95
; P9LE-NEXT: add r4, r8, r4
; P9LE-NEXT: srwi r4, r4, 6
; P9LE-NEXT: mulli r8, r4, 95
; P9LE-NEXT: sub r3, r3, r8
; P9LE-NEXT: vmrglh v3, v4, v3
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: xxswapd v2, vs0
; P9LE-NEXT: mtfprd f0, r4
; P9LE-NEXT: vmrglh v2, v2, v4
; P9LE-NEXT: mtvsrd v2, r3
; P9LE-NEXT: vmrghh v2, v2, v4
; P9LE-NEXT: mtvsrd v4, r6
; P9LE-NEXT: vmrglw v2, v2, v3
; P9LE-NEXT: xxswapd v3, vs0
; P9LE-NEXT: mtfprd f0, r6
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtfprd f0, r7
; P9LE-NEXT: vmrglh v3, v4, v3
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtfprd f0, r5
; P9LE-NEXT: xxswapd v5, vs0
; P9LE-NEXT: vmrglh v4, v5, v4
; P9LE-NEXT: mtvsrd v3, r5
; P9LE-NEXT: vmrghh v3, v4, v3
; P9LE-NEXT: mtvsrd v4, r7
; P9LE-NEXT: mtvsrd v5, r4
; P9LE-NEXT: vmrghh v4, v5, v4
; P9LE-NEXT: vmrglw v3, v4, v3
; P9LE-NEXT: vadduhm v2, v2, v3
; P9LE-NEXT: blr
@ -598,69 +574,61 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
; P8LE-LABEL: combine_urem_udiv:
; P8LE: # %bb.0:
; P8LE-NEXT: xxswapd vs0, v2
; P8LE-NEXT: lis r4, 22765
; P8LE-NEXT: lis r3, 22765
; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; P8LE-NEXT: ori r4, r4, 8969
; P8LE-NEXT: mffprd r5, f0
; P8LE-NEXT: clrldi r3, r5, 48
; P8LE-NEXT: rldicl r6, r5, 48, 48
; P8LE-NEXT: clrlwi r8, r3, 16
; P8LE-NEXT: rldicl r7, r5, 32, 48
; P8LE-NEXT: clrlwi r9, r6, 16
; P8LE-NEXT: mulhwu r10, r8, r4
; P8LE-NEXT: clrlwi r11, r7, 16
; P8LE-NEXT: rldicl r5, r5, 16, 48
; P8LE-NEXT: mulhwu r12, r9, r4
; P8LE-NEXT: mulhwu r0, r11, r4
; P8LE-NEXT: clrlwi r30, r5, 16
; P8LE-NEXT: mulhwu r4, r30, r4
; P8LE-NEXT: sub r8, r8, r10
; P8LE-NEXT: ori r3, r3, 8969
; P8LE-NEXT: mffprd r4, f0
; P8LE-NEXT: clrldi r5, r4, 48
; P8LE-NEXT: rldicl r6, r4, 48, 48
; P8LE-NEXT: clrlwi r5, r5, 16
; P8LE-NEXT: clrlwi r8, r6, 16
; P8LE-NEXT: rldicl r7, r4, 32, 48
; P8LE-NEXT: rldicl r4, r4, 16, 48
; P8LE-NEXT: mulhwu r9, r5, r3
; P8LE-NEXT: mulhwu r11, r8, r3
; P8LE-NEXT: clrlwi r10, r7, 16
; P8LE-NEXT: clrlwi r12, r4, 16
; P8LE-NEXT: mulhwu r0, r10, r3
; P8LE-NEXT: mulhwu r3, r12, r3
; P8LE-NEXT: sub r30, r5, r9
; P8LE-NEXT: sub r8, r8, r11
; P8LE-NEXT: srwi r30, r30, 1
; P8LE-NEXT: srwi r8, r8, 1
; P8LE-NEXT: sub r9, r9, r12
; P8LE-NEXT: add r8, r8, r10
; P8LE-NEXT: sub r10, r11, r0
; P8LE-NEXT: srwi r9, r9, 1
; P8LE-NEXT: sub r10, r10, r0
; P8LE-NEXT: add r9, r30, r9
; P8LE-NEXT: add r8, r8, r11
; P8LE-NEXT: sub r11, r12, r3
; P8LE-NEXT: srwi r10, r10, 1
; P8LE-NEXT: sub r11, r30, r4
; P8LE-NEXT: add r9, r9, r12
; P8LE-NEXT: srwi r8, r8, 6
; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; P8LE-NEXT: add r10, r10, r0
; P8LE-NEXT: srwi r11, r11, 1
; P8LE-NEXT: srwi r9, r9, 6
; P8LE-NEXT: mtfprd f0, r8
; P8LE-NEXT: mulli r12, r8, 95
; P8LE-NEXT: srwi r11, r11, 1
; P8LE-NEXT: srwi r8, r8, 6
; P8LE-NEXT: add r10, r10, r0
; P8LE-NEXT: mulli r12, r9, 95
; P8LE-NEXT: add r3, r11, r3
; P8LE-NEXT: mtvsrd v2, r9
; P8LE-NEXT: srwi r10, r10, 6
; P8LE-NEXT: add r4, r11, r4
; P8LE-NEXT: mtfprd f1, r9
; P8LE-NEXT: mulli r8, r9, 95
; P8LE-NEXT: mulli r9, r10, 95
; P8LE-NEXT: srwi r4, r4, 6
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: mtfprd f2, r10
; P8LE-NEXT: mtfprd f3, r4
; P8LE-NEXT: mulli r4, r4, 95
; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: xxswapd v1, vs2
; P8LE-NEXT: sub r3, r3, r12
; P8LE-NEXT: xxswapd v6, vs3
; P8LE-NEXT: mtfprd f0, r3
; P8LE-NEXT: sub r3, r7, r9
; P8LE-NEXT: sub r6, r6, r8
; P8LE-NEXT: mtfprd f4, r3
; P8LE-NEXT: sub r3, r5, r4
; P8LE-NEXT: mtfprd f1, r6
; P8LE-NEXT: mtfprd f5, r3
; P8LE-NEXT: xxswapd v5, vs4
; P8LE-NEXT: vmrglh v2, v3, v2
; P8LE-NEXT: xxswapd v3, vs0
; P8LE-NEXT: xxswapd v4, vs1
; P8LE-NEXT: xxswapd v0, vs5
; P8LE-NEXT: vmrglh v3, v4, v3
; P8LE-NEXT: vmrglh v4, v0, v5
; P8LE-NEXT: vmrglh v5, v6, v1
; P8LE-NEXT: vmrglw v3, v4, v3
; P8LE-NEXT: vmrglw v2, v5, v2
; P8LE-NEXT: mulli r9, r8, 95
; P8LE-NEXT: srwi r3, r3, 6
; P8LE-NEXT: mtvsrd v3, r8
; P8LE-NEXT: mulli r8, r10, 95
; P8LE-NEXT: mtvsrd v4, r10
; P8LE-NEXT: mulli r10, r3, 95
; P8LE-NEXT: vmrghh v2, v3, v2
; P8LE-NEXT: sub r5, r5, r12
; P8LE-NEXT: sub r6, r6, r9
; P8LE-NEXT: mtvsrd v3, r5
; P8LE-NEXT: mtvsrd v5, r6
; P8LE-NEXT: sub r5, r7, r8
; P8LE-NEXT: sub r4, r4, r10
; P8LE-NEXT: mtvsrd v0, r5
; P8LE-NEXT: mtvsrd v1, r4
; P8LE-NEXT: vmrghh v3, v5, v3
; P8LE-NEXT: mtvsrd v5, r3
; P8LE-NEXT: vmrghh v0, v1, v0
; P8LE-NEXT: vmrghh v4, v5, v4
; P8LE-NEXT: vmrglw v3, v0, v3
; P8LE-NEXT: vmrglw v2, v4, v2
; P8LE-NEXT: vadduhm v2, v3, v2
; P8LE-NEXT: blr
;
@ -742,34 +710,30 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
; P9LE-NEXT: li r3, 0
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: clrlwi r3, r3, 26
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: mtvsrd v3, r3
; P9LE-NEXT: li r3, 2
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: clrlwi r3, r3, 27
; P9LE-NEXT: xxswapd v3, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: mtvsrd v4, r3
; P9LE-NEXT: li r3, 6
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: lis r5, 22765
; P9LE-NEXT: ori r5, r5, 8969
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: clrlwi r4, r3, 16
; P9LE-NEXT: mulhwu r5, r4, r5
; P9LE-NEXT: sub r4, r4, r5
; P9LE-NEXT: srwi r4, r4, 1
; P9LE-NEXT: add r4, r4, r5
; P9LE-NEXT: lis r4, 22765
; P9LE-NEXT: ori r4, r4, 8969
; P9LE-NEXT: vmrghh v3, v4, v3
; P9LE-NEXT: clrlwi r3, r3, 16
; P9LE-NEXT: mulhwu r4, r3, r4
; P9LE-NEXT: sub r5, r3, r4
; P9LE-NEXT: srwi r5, r5, 1
; P9LE-NEXT: add r4, r5, r4
; P9LE-NEXT: srwi r4, r4, 6
; P9LE-NEXT: mulli r4, r4, 95
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: mtvsrd v4, r3
; P9LE-NEXT: li r3, 4
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: clrlwi r3, r3, 29
; P9LE-NEXT: vmrglh v3, v4, v3
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: xxswapd v2, vs0
; P9LE-NEXT: vmrglh v2, v4, v2
; P9LE-NEXT: mtvsrd v2, r3
; P9LE-NEXT: vmrghh v2, v4, v2
; P9LE-NEXT: vmrglw v2, v2, v3
; P9LE-NEXT: blr
;
@ -817,9 +781,9 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
; P8LE-NEXT: mffprd r4, f0
; P8LE-NEXT: rldicl r5, r4, 16, 48
; P8LE-NEXT: rldicl r7, r4, 48, 48
; P8LE-NEXT: clrlwi r6, r5, 16
; P8LE-NEXT: mulhwu r3, r6, r3
; P8LE-NEXT: sub r6, r6, r3
; P8LE-NEXT: clrlwi r5, r5, 16
; P8LE-NEXT: mulhwu r3, r5, r3
; P8LE-NEXT: sub r6, r5, r3
; P8LE-NEXT: srwi r6, r6, 1
; P8LE-NEXT: add r3, r6, r3
; P8LE-NEXT: clrldi r6, r4, 48
@ -827,19 +791,15 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
; P8LE-NEXT: clrlwi r6, r6, 26
; P8LE-NEXT: mulli r3, r3, 95
; P8LE-NEXT: rldicl r4, r4, 32, 48
; P8LE-NEXT: mtfprd f0, r6
; P8LE-NEXT: mtvsrd v2, r6
; P8LE-NEXT: clrlwi r6, r7, 27
; P8LE-NEXT: clrlwi r4, r4, 29
; P8LE-NEXT: mtfprd f1, r6
; P8LE-NEXT: mtfprd f3, r4
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: mtvsrd v3, r6
; P8LE-NEXT: mtvsrd v5, r4
; P8LE-NEXT: vmrghh v2, v3, v2
; P8LE-NEXT: sub r3, r5, r3
; P8LE-NEXT: xxswapd v5, vs3
; P8LE-NEXT: mtfprd f2, r3
; P8LE-NEXT: vmrglh v2, v3, v2
; P8LE-NEXT: xxswapd v4, vs2
; P8LE-NEXT: vmrglh v3, v4, v5
; P8LE-NEXT: mtvsrd v4, r3
; P8LE-NEXT: vmrghh v3, v4, v5
; P8LE-NEXT: vmrglw v2, v3, v2
; P8LE-NEXT: blr
;
@ -885,40 +845,39 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
; P9LE: # %bb.0:
; P9LE-NEXT: li r3, 4
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: lis r5, -19946
; P9LE-NEXT: ori r5, r5, 17097
; P9LE-NEXT: clrlwi r4, r3, 16
; P9LE-NEXT: mulhwu r4, r4, r5
; P9LE-NEXT: lis r5, 24749
; P9LE-NEXT: ori r5, r5, 47143
; P9LE-NEXT: lis r4, -19946
; P9LE-NEXT: ori r4, r4, 17097
; P9LE-NEXT: clrlwi r3, r3, 16
; P9LE-NEXT: mulhwu r4, r3, r4
; P9LE-NEXT: srwi r4, r4, 4
; P9LE-NEXT: mulli r4, r4, 23
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: lis r4, 24749
; P9LE-NEXT: mtvsrd v3, r3
; P9LE-NEXT: li r3, 6
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: clrlwi r4, r3, 16
; P9LE-NEXT: mulhwu r4, r4, r5
; P9LE-NEXT: lis r5, -14230
; P9LE-NEXT: ori r5, r5, 30865
; P9LE-NEXT: clrlwi r3, r3, 16
; P9LE-NEXT: ori r4, r4, 47143
; P9LE-NEXT: mulhwu r4, r3, r4
; P9LE-NEXT: srwi r4, r4, 11
; P9LE-NEXT: mulli r4, r4, 5423
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: xxswapd v3, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: mtvsrd v4, r3
; P9LE-NEXT: li r3, 2
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: rlwinm r4, r3, 31, 17, 31
; P9LE-NEXT: mulhwu r4, r4, r5
; P9LE-NEXT: srwi r4, r4, 8
; P9LE-NEXT: mulli r4, r4, 654
; P9LE-NEXT: sub r3, r3, r4
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtfprd f0, r3
; P9LE-NEXT: xxswapd v2, vs0
; P9LE-NEXT: vmrglh v3, v4, v3
; P9LE-NEXT: xxlxor v4, v4, v4
; P9LE-NEXT: vmrglh v2, v2, v4
; P9LE-NEXT: lis r5, -14230
; P9LE-NEXT: ori r5, r5, 30865
; P9LE-NEXT: vmrghh v3, v4, v3
; P9LE-NEXT: clrlwi r4, r3, 16
; P9LE-NEXT: rlwinm r3, r3, 31, 17, 31
; P9LE-NEXT: mulhwu r3, r3, r5
; P9LE-NEXT: srwi r3, r3, 8
; P9LE-NEXT: mulli r3, r3, 654
; P9LE-NEXT: sub r3, r4, r3
; P9LE-NEXT: mtvsrd v2, r3
; P9LE-NEXT: li r3, 0
; P9LE-NEXT: mtvsrd v4, r3
; P9LE-NEXT: vmrghh v2, v2, v4
; P9LE-NEXT: vmrglw v2, v3, v2
; P9LE-NEXT: blr
;
@ -969,41 +928,40 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
; P8LE-LABEL: dont_fold_urem_one:
; P8LE: # %bb.0:
; P8LE-NEXT: xxswapd vs0, v2
; P8LE-NEXT: lis r3, -19946
; P8LE-NEXT: lis r7, 24749
; P8LE-NEXT: lis r9, -14230
; P8LE-NEXT: xxlxor v5, v5, v5
; P8LE-NEXT: ori r3, r3, 17097
; P8LE-NEXT: ori r7, r7, 47143
; P8LE-NEXT: ori r9, r9, 30865
; P8LE-NEXT: lis r3, -14230
; P8LE-NEXT: lis r7, -19946
; P8LE-NEXT: lis r9, 24749
; P8LE-NEXT: ori r3, r3, 30865
; P8LE-NEXT: ori r7, r7, 17097
; P8LE-NEXT: mffprd r4, f0
; P8LE-NEXT: rldicl r5, r4, 32, 48
; P8LE-NEXT: rldicl r6, r4, 16, 48
; P8LE-NEXT: clrlwi r8, r5, 16
; P8LE-NEXT: rldicl r4, r4, 48, 48
; P8LE-NEXT: rldicl r5, r4, 48, 48
; P8LE-NEXT: rldicl r6, r4, 32, 48
; P8LE-NEXT: rldicl r4, r4, 16, 48
; P8LE-NEXT: rlwinm r8, r5, 31, 17, 31
; P8LE-NEXT: clrlwi r6, r6, 16
; P8LE-NEXT: clrlwi r5, r5, 16
; P8LE-NEXT: mulhwu r3, r8, r3
; P8LE-NEXT: clrlwi r8, r6, 16
; P8LE-NEXT: mulhwu r7, r8, r7
; P8LE-NEXT: rlwinm r8, r4, 31, 17, 31
; P8LE-NEXT: mulhwu r8, r8, r9
; P8LE-NEXT: srwi r3, r3, 4
; P8LE-NEXT: srwi r7, r7, 11
; P8LE-NEXT: mulli r3, r3, 23
; P8LE-NEXT: srwi r8, r8, 8
; P8LE-NEXT: mulli r7, r7, 5423
; P8LE-NEXT: mulli r8, r8, 654
; P8LE-NEXT: ori r8, r9, 47143
; P8LE-NEXT: clrlwi r4, r4, 16
; P8LE-NEXT: li r9, 0
; P8LE-NEXT: mulhwu r7, r6, r7
; P8LE-NEXT: mulhwu r8, r4, r8
; P8LE-NEXT: mtvsrd v2, r9
; P8LE-NEXT: srwi r3, r3, 8
; P8LE-NEXT: srwi r7, r7, 4
; P8LE-NEXT: mulli r3, r3, 654
; P8LE-NEXT: srwi r8, r8, 11
; P8LE-NEXT: mulli r7, r7, 23
; P8LE-NEXT: mulli r8, r8, 5423
; P8LE-NEXT: sub r3, r5, r3
; P8LE-NEXT: sub r5, r6, r7
; P8LE-NEXT: mtfprd f0, r3
; P8LE-NEXT: mtvsrd v3, r3
; P8LE-NEXT: sub r3, r4, r8
; P8LE-NEXT: mtfprd f1, r5
; P8LE-NEXT: mtfprd f2, r3
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: xxswapd v4, vs2
; P8LE-NEXT: vmrglh v2, v3, v2
; P8LE-NEXT: vmrglh v3, v4, v5
; P8LE-NEXT: vmrglw v2, v2, v3
; P8LE-NEXT: mtvsrd v4, r5
; P8LE-NEXT: mtvsrd v5, r3
; P8LE-NEXT: vmrghh v2, v3, v2
; P8LE-NEXT: vmrghh v3, v5, v4
; P8LE-NEXT: vmrglw v2, v3, v2
; P8LE-NEXT: blr
;
; P8BE-LABEL: dont_fold_urem_one:

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -16,12 +16,10 @@ define i64 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
; CHECK-P8-NEXT: xscvdpuxws f1, v2
; CHECK-P8-NEXT: xscvdpuxws f0, f0
; CHECK-P8-NEXT: mffprwz r3, f1
; CHECK-P8-NEXT: mtvsrwz v2, r3
; CHECK-P8-NEXT: mffprwz r4, f0
; CHECK-P8-NEXT: mtfprd f0, r3
; CHECK-P8-NEXT: mtfprd f1, r4
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: vmrglw v2, v2, v3
; CHECK-P8-NEXT: mtvsrwz v3, r4
; CHECK-P8-NEXT: vmrghw v2, v2, v3
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: mffprd r3, f0
; CHECK-P8-NEXT: blr
@ -35,7 +33,7 @@ define i64 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
; CHECK-P9-NEXT: xscvdpuxws f0, f0
; CHECK-P9-NEXT: mffprwz r3, f0
; CHECK-P9-NEXT: mtvsrws v2, r3
; CHECK-P9-NEXT: vmrglw v2, v3, v2
; CHECK-P9-NEXT: vmrghw v2, v3, v2
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
@ -310,12 +308,10 @@ define i64 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
; CHECK-P8-NEXT: xscvdpsxws f1, v2
; CHECK-P8-NEXT: xscvdpsxws f0, f0
; CHECK-P8-NEXT: mffprwz r3, f1
; CHECK-P8-NEXT: mtvsrwz v2, r3
; CHECK-P8-NEXT: mffprwz r4, f0
; CHECK-P8-NEXT: mtfprd f0, r3
; CHECK-P8-NEXT: mtfprd f1, r4
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: vmrglw v2, v2, v3
; CHECK-P8-NEXT: mtvsrwz v3, r4
; CHECK-P8-NEXT: vmrghw v2, v2, v3
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: mffprd r3, f0
; CHECK-P8-NEXT: blr
@ -329,7 +325,7 @@ define i64 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
; CHECK-P9-NEXT: xscvdpsxws f0, f0
; CHECK-P9-NEXT: mffprwz r3, f0
; CHECK-P9-NEXT: mtvsrws v2, r3
; CHECK-P9-NEXT: vmrglw v2, v3, v2
; CHECK-P9-NEXT: vmrghw v2, v3, v2
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;

View File

@ -16,12 +16,10 @@ define i16 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
; CHECK-P8-NEXT: xscvdpsxws f1, v2
; CHECK-P8-NEXT: xscvdpsxws f0, f0
; CHECK-P8-NEXT: mffprwz r3, f1
; CHECK-P8-NEXT: mtvsrd v2, r3
; CHECK-P8-NEXT: mffprwz r4, f0
; CHECK-P8-NEXT: mtfprd f0, r3
; CHECK-P8-NEXT: mtfprd f1, r4
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: vmrglb v2, v2, v3
; CHECK-P8-NEXT: mtvsrd v3, r4
; CHECK-P8-NEXT: vmrghb v2, v2, v3
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: mffprd r3, f0
; CHECK-P8-NEXT: clrldi r3, r3, 48
@ -33,15 +31,13 @@ define i16 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xscvdpsxws f0, v2
; CHECK-P9-NEXT: mffprwz r3, f0
; CHECK-P9-NEXT: mtfprd f0, r3
; CHECK-P9-NEXT: xxswapd v3, vs0
; CHECK-P9-NEXT: xxswapd vs0, v2
; CHECK-P9-NEXT: mtvsrd v3, r3
; CHECK-P9-NEXT: xscvdpsxws f0, f0
; CHECK-P9-NEXT: mffprwz r3, f0
; CHECK-P9-NEXT: mtfprd f0, r3
; CHECK-P9-NEXT: mtvsrd v2, r3
; CHECK-P9-NEXT: addi r3, r1, -2
; CHECK-P9-NEXT: xxswapd v2, vs0
; CHECK-P9-NEXT: vmrglb v2, v3, v2
; CHECK-P9-NEXT: vmrghb v2, v3, v2
; CHECK-P9-NEXT: vsldoi v2, v2, v2, 8
; CHECK-P9-NEXT: stxsihx v2, 0, r3
; CHECK-P9-NEXT: lhz r3, -2(r1)
@ -84,18 +80,14 @@ define i32 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
; CHECK-P8-NEXT: xscvdpsxws f1, f1
; CHECK-P8-NEXT: mffprwz r3, f2
; CHECK-P8-NEXT: mffprwz r4, f3
; CHECK-P8-NEXT: mtfprd f2, r3
; CHECK-P8-NEXT: mtfprd f3, r4
; CHECK-P8-NEXT: mtvsrd v2, r3
; CHECK-P8-NEXT: mtvsrd v3, r4
; CHECK-P8-NEXT: mffprwz r3, f0
; CHECK-P8-NEXT: xxswapd v2, vs2
; CHECK-P8-NEXT: mffprwz r4, f1
; CHECK-P8-NEXT: xxswapd v4, vs3
; CHECK-P8-NEXT: mtfprd f0, r3
; CHECK-P8-NEXT: mtfprd f1, r4
; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: xxswapd v5, vs1
; CHECK-P8-NEXT: vmrglb v2, v3, v2
; CHECK-P8-NEXT: vmrglb v3, v5, v4
; CHECK-P8-NEXT: mtvsrd v4, r3
; CHECK-P8-NEXT: mtvsrd v5, r4
; CHECK-P8-NEXT: vmrghb v2, v4, v2
; CHECK-P8-NEXT: vmrghb v3, v5, v3
; CHECK-P8-NEXT: vmrglh v2, v3, v2
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: mffprwz r3, f0
@ -109,24 +101,20 @@ define i32 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
; CHECK-P9-NEXT: xscvdpsxws f1, f1
; CHECK-P9-NEXT: lxv vs0, 16(r3)
; CHECK-P9-NEXT: mffprwz r3, f2
; CHECK-P9-NEXT: mtfprd f2, r3
; CHECK-P9-NEXT: mtvsrd v2, r3
; CHECK-P9-NEXT: mffprwz r3, f1
; CHECK-P9-NEXT: xxswapd v2, vs2
; CHECK-P9-NEXT: mtfprd f1, r3
; CHECK-P9-NEXT: xxswapd v3, vs1
; CHECK-P9-NEXT: xscvdpsxws f1, f0
; CHECK-P9-NEXT: xxswapd vs0, vs0
; CHECK-P9-NEXT: mtvsrd v3, r3
; CHECK-P9-NEXT: xscvdpsxws f0, f0
; CHECK-P9-NEXT: vmrghb v2, v2, v3
; CHECK-P9-NEXT: mffprwz r3, f1
; CHECK-P9-NEXT: mtfprd f1, r3
; CHECK-P9-NEXT: mtvsrd v3, r3
; CHECK-P9-NEXT: mffprwz r3, f0
; CHECK-P9-NEXT: mtfprd f0, r3
; CHECK-P9-NEXT: vmrglb v2, v2, v3
; CHECK-P9-NEXT: xxswapd v3, vs1
; CHECK-P9-NEXT: xxswapd v4, vs0
; CHECK-P9-NEXT: vmrglb v3, v3, v4
; CHECK-P9-NEXT: vmrglh v2, v3, v2
; CHECK-P9-NEXT: mtvsrd v4, r3
; CHECK-P9-NEXT: li r3, 0
; CHECK-P9-NEXT: vmrghb v3, v3, v4
; CHECK-P9-NEXT: vmrglh v2, v3, v2
; CHECK-P9-NEXT: vextuwrx r3, r3, v2
; CHECK-P9-NEXT: blr
;
@ -185,36 +173,28 @@ define i64 @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr #1 {
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: xscvdpsxws f0, f0
; CHECK-P8-NEXT: xscvdpsxws f1, f1
; CHECK-P8-NEXT: mffprwz r3, f4
; CHECK-P8-NEXT: xscvdpsxws f2, f2
; CHECK-P8-NEXT: xscvdpsxws f3, f3
; CHECK-P8-NEXT: mffprwz r3, f4
; CHECK-P8-NEXT: mffprwz r4, f5
; CHECK-P8-NEXT: mtfprd f4, r3
; CHECK-P8-NEXT: mtvsrd v2, r3
; CHECK-P8-NEXT: mffprwz r3, f6
; CHECK-P8-NEXT: mtfprd f5, r4
; CHECK-P8-NEXT: xxswapd v2, vs4
; CHECK-P8-NEXT: mtvsrd v3, r4
; CHECK-P8-NEXT: mffprwz r4, f7
; CHECK-P8-NEXT: mtfprd f6, r3
; CHECK-P8-NEXT: xxswapd v3, vs5
; CHECK-P8-NEXT: mtvsrd v4, r3
; CHECK-P8-NEXT: mtvsrd v5, r4
; CHECK-P8-NEXT: mffprwz r3, f0
; CHECK-P8-NEXT: mtfprd f7, r4
; CHECK-P8-NEXT: xxswapd v4, vs6
; CHECK-P8-NEXT: mffprwz r4, f1
; CHECK-P8-NEXT: mtfprd f0, r3
; CHECK-P8-NEXT: xxswapd v1, vs7
; CHECK-P8-NEXT: mtvsrd v0, r3
; CHECK-P8-NEXT: mtvsrd v1, r4
; CHECK-P8-NEXT: mffprwz r3, f2
; CHECK-P8-NEXT: mtfprd f1, r4
; CHECK-P8-NEXT: xxswapd v5, vs0
; CHECK-P8-NEXT: mffprwz r4, f3
; CHECK-P8-NEXT: mtfprd f2, r3
; CHECK-P8-NEXT: xxswapd v0, vs1
; CHECK-P8-NEXT: mtfprd f0, r4
; CHECK-P8-NEXT: xxswapd v6, vs2
; CHECK-P8-NEXT: vmrglb v2, v5, v2
; CHECK-P8-NEXT: xxswapd v5, vs0
; CHECK-P8-NEXT: vmrglb v3, v0, v3
; CHECK-P8-NEXT: vmrglb v4, v6, v4
; CHECK-P8-NEXT: vmrglb v5, v5, v1
; CHECK-P8-NEXT: vmrghb v2, v0, v2
; CHECK-P8-NEXT: vmrghb v3, v1, v3
; CHECK-P8-NEXT: mtvsrd v0, r3
; CHECK-P8-NEXT: mtvsrd v1, r4
; CHECK-P8-NEXT: vmrghb v4, v0, v4
; CHECK-P8-NEXT: vmrghb v5, v1, v5
; CHECK-P8-NEXT: vmrglh v2, v3, v2
; CHECK-P8-NEXT: vmrglh v3, v5, v4
; CHECK-P8-NEXT: vmrglw v2, v3, v2
@ -228,47 +208,39 @@ define i64 @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr #1 {
; CHECK-P9-NEXT: xscvdpsxws f4, f3
; CHECK-P9-NEXT: xxswapd vs3, vs3
; CHECK-P9-NEXT: xscvdpsxws f3, f3
; CHECK-P9-NEXT: lxv vs2, 16(r3)
; CHECK-P9-NEXT: lxv vs0, 48(r3)
; CHECK-P9-NEXT: lxv vs1, 32(r3)
; CHECK-P9-NEXT: lxv vs2, 16(r3)
; CHECK-P9-NEXT: mffprwz r3, f4
; CHECK-P9-NEXT: mtfprd f4, r3
; CHECK-P9-NEXT: mtvsrd v2, r3
; CHECK-P9-NEXT: mffprwz r3, f3
; CHECK-P9-NEXT: xxswapd v2, vs4
; CHECK-P9-NEXT: mtfprd f3, r3
; CHECK-P9-NEXT: xxswapd v3, vs3
; CHECK-P9-NEXT: xscvdpsxws f3, f2
; CHECK-P9-NEXT: xxswapd vs2, vs2
; CHECK-P9-NEXT: mtvsrd v3, r3
; CHECK-P9-NEXT: xscvdpsxws f2, f2
; CHECK-P9-NEXT: vmrghb v2, v2, v3
; CHECK-P9-NEXT: mffprwz r3, f3
; CHECK-P9-NEXT: mtfprd f3, r3
; CHECK-P9-NEXT: mtvsrd v3, r3
; CHECK-P9-NEXT: mffprwz r3, f2
; CHECK-P9-NEXT: mtfprd f2, r3
; CHECK-P9-NEXT: xxswapd v4, vs2
; CHECK-P9-NEXT: xscvdpsxws f2, f1
; CHECK-P9-NEXT: xxswapd vs1, vs1
; CHECK-P9-NEXT: mtvsrd v4, r3
; CHECK-P9-NEXT: xscvdpsxws f1, f1
; CHECK-P9-NEXT: vmrghb v3, v3, v4
; CHECK-P9-NEXT: mffprwz r3, f2
; CHECK-P9-NEXT: mtfprd f2, r3
; CHECK-P9-NEXT: mffprwz r3, f1
; CHECK-P9-NEXT: vmrglb v2, v2, v3
; CHECK-P9-NEXT: xxswapd v3, vs3
; CHECK-P9-NEXT: vmrglb v3, v3, v4
; CHECK-P9-NEXT: vmrglh v2, v3, v2
; CHECK-P9-NEXT: xxswapd v3, vs2
; CHECK-P9-NEXT: mtfprd f1, r3
; CHECK-P9-NEXT: xxswapd v4, vs1
; CHECK-P9-NEXT: mtvsrd v3, r3
; CHECK-P9-NEXT: mffprwz r3, f1
; CHECK-P9-NEXT: xscvdpsxws f1, f0
; CHECK-P9-NEXT: xxswapd vs0, vs0
; CHECK-P9-NEXT: mtvsrd v4, r3
; CHECK-P9-NEXT: xscvdpsxws f0, f0
; CHECK-P9-NEXT: vmrghb v3, v3, v4
; CHECK-P9-NEXT: mffprwz r3, f1
; CHECK-P9-NEXT: mtfprd f1, r3
; CHECK-P9-NEXT: mtvsrd v4, r3
; CHECK-P9-NEXT: mffprwz r3, f0
; CHECK-P9-NEXT: mtfprd f0, r3
; CHECK-P9-NEXT: vmrglb v3, v3, v4
; CHECK-P9-NEXT: xxswapd v4, vs1
; CHECK-P9-NEXT: xxswapd v5, vs0
; CHECK-P9-NEXT: vmrglb v4, v4, v5
; CHECK-P9-NEXT: mtvsrd v5, r3
; CHECK-P9-NEXT: vmrghb v4, v4, v5
; CHECK-P9-NEXT: vmrglh v3, v4, v3
; CHECK-P9-NEXT: vmrglw v2, v3, v2
; CHECK-P9-NEXT: mfvsrld r3, v2
@ -364,79 +336,63 @@ define <16 x i8> @test16elt(<16 x double>* nocapture readonly) local_unnamed_add
; CHECK-P8-NEXT: xxswapd vs7, vs7
; CHECK-P8-NEXT: xscvdpsxws v2, f9
; CHECK-P8-NEXT: xxswapd vs9, vs9
; CHECK-P8-NEXT: mffprwz r3, f4
; CHECK-P8-NEXT: xscvdpsxws v3, f11
; CHECK-P8-NEXT: xxswapd vs11, vs11
; CHECK-P8-NEXT: mffprwz r3, f4
; CHECK-P8-NEXT: mffprwz r4, f6
; CHECK-P8-NEXT: xscvdpsxws f0, f0
; CHECK-P8-NEXT: mtfprd f4, r3
; CHECK-P8-NEXT: mffprwz r3, f8
; CHECK-P8-NEXT: xscvdpsxws f1, f1
; CHECK-P8-NEXT: xxswapd v4, vs4
; CHECK-P8-NEXT: mtfprd f6, r4
; CHECK-P8-NEXT: mtvsrd v4, r3
; CHECK-P8-NEXT: mffprwz r3, f8
; CHECK-P8-NEXT: mtvsrd v5, r4
; CHECK-P8-NEXT: mffprwz r4, f10
; CHECK-P8-NEXT: xscvdpsxws f2, f2
; CHECK-P8-NEXT: xxswapd v5, vs6
; CHECK-P8-NEXT: mtfprd f8, r3
; CHECK-P8-NEXT: mffprwz r3, f12
; CHECK-P8-NEXT: xscvdpsxws f3, f3
; CHECK-P8-NEXT: xxswapd v0, vs8
; CHECK-P8-NEXT: mtfprd f10, r4
; CHECK-P8-NEXT: mtvsrd v0, r3
; CHECK-P8-NEXT: mffprwz r3, f12
; CHECK-P8-NEXT: mtvsrd v1, r4
; CHECK-P8-NEXT: mffprwz r4, f13
; CHECK-P8-NEXT: xscvdpsxws f5, f5
; CHECK-P8-NEXT: xxswapd v1, vs10
; CHECK-P8-NEXT: mtfprd f12, r3
; CHECK-P8-NEXT: mfvsrwz r3, v2
; CHECK-P8-NEXT: xscvdpsxws f7, f7
; CHECK-P8-NEXT: xxswapd v6, vs12
; CHECK-P8-NEXT: mtfprd f13, r4
; CHECK-P8-NEXT: mtvsrd v6, r3
; CHECK-P8-NEXT: mfvsrwz r3, v2
; CHECK-P8-NEXT: mtvsrd v2, r4
; CHECK-P8-NEXT: mfvsrwz r4, v3
; CHECK-P8-NEXT: mtvsrd v2, r3
; CHECK-P8-NEXT: xxswapd v7, vs13
; CHECK-P8-NEXT: mffprwz r3, f0
; CHECK-P8-NEXT: xscvdpsxws f9, f9
; CHECK-P8-NEXT: xxswapd v2, v2
; CHECK-P8-NEXT: xscvdpsxws f11, f11
; CHECK-P8-NEXT: mtvsrd v3, r4
; CHECK-P8-NEXT: mtvsrd v3, r3
; CHECK-P8-NEXT: mtvsrd v7, r4
; CHECK-P8-NEXT: mffprwz r3, f0
; CHECK-P8-NEXT: mffprwz r4, f1
; CHECK-P8-NEXT: mtfprd f0, r3
; CHECK-P8-NEXT: xxswapd v3, v3
; CHECK-P8-NEXT: mtvsrd v8, r3
; CHECK-P8-NEXT: mtvsrd v9, r4
; CHECK-P8-NEXT: mffprwz r3, f2
; CHECK-P8-NEXT: mtfprd f1, r4
; CHECK-P8-NEXT: xxswapd v8, vs0
; CHECK-P8-NEXT: mffprwz r4, f3
; CHECK-P8-NEXT: mtfprd f2, r3
; CHECK-P8-NEXT: xxswapd v9, vs1
; CHECK-P8-NEXT: vmrghb v4, v8, v4
; CHECK-P8-NEXT: vmrghb v5, v9, v5
; CHECK-P8-NEXT: mtvsrd v8, r3
; CHECK-P8-NEXT: mtvsrd v9, r4
; CHECK-P8-NEXT: mffprwz r3, f5
; CHECK-P8-NEXT: mtfprd f3, r4
; CHECK-P8-NEXT: xxswapd v10, vs2
; CHECK-P8-NEXT: mffprwz r4, f7
; CHECK-P8-NEXT: mtfprd f5, r3
; CHECK-P8-NEXT: vmrghb v0, v8, v0
; CHECK-P8-NEXT: vmrghb v1, v9, v1
; CHECK-P8-NEXT: mtvsrd v8, r3
; CHECK-P8-NEXT: mtvsrd v9, r4
; CHECK-P8-NEXT: mffprwz r3, f9
; CHECK-P8-NEXT: mtfprd f7, r4
; CHECK-P8-NEXT: mffprwz r4, f11
; CHECK-P8-NEXT: vmrglb v4, v8, v4
; CHECK-P8-NEXT: xxswapd v8, vs3
; CHECK-P8-NEXT: vmrglb v5, v9, v5
; CHECK-P8-NEXT: xxswapd v9, vs5
; CHECK-P8-NEXT: mtfprd f0, r3
; CHECK-P8-NEXT: mtfprd f1, r4
; CHECK-P8-NEXT: vmrglb v0, v10, v0
; CHECK-P8-NEXT: xxswapd v10, vs7
; CHECK-P8-NEXT: vmrglb v1, v8, v1
; CHECK-P8-NEXT: xxswapd v8, vs0
; CHECK-P8-NEXT: vmrglb v6, v9, v6
; CHECK-P8-NEXT: xxswapd v9, vs1
; CHECK-P8-NEXT: vmrglb v7, v10, v7
; CHECK-P8-NEXT: vmrglb v2, v8, v2
; CHECK-P8-NEXT: vmrglb v3, v9, v3
; CHECK-P8-NEXT: vmrghb v6, v8, v6
; CHECK-P8-NEXT: vmrghb v2, v9, v2
; CHECK-P8-NEXT: mtvsrd v8, r3
; CHECK-P8-NEXT: mtvsrd v9, r4
; CHECK-P8-NEXT: vmrghb v3, v8, v3
; CHECK-P8-NEXT: vmrghb v7, v9, v7
; CHECK-P8-NEXT: vmrglh v4, v5, v4
; CHECK-P8-NEXT: vmrglh v5, v1, v0
; CHECK-P8-NEXT: vmrglh v0, v7, v6
; CHECK-P8-NEXT: vmrglh v2, v3, v2
; CHECK-P8-NEXT: vmrglw v3, v5, v4
; CHECK-P8-NEXT: vmrglw v2, v2, v0
; CHECK-P8-NEXT: xxmrgld v2, v2, v3
; CHECK-P8-NEXT: vmrglh v2, v2, v6
; CHECK-P8-NEXT: vmrglh v3, v7, v3
; CHECK-P8-NEXT: vmrglw v4, v5, v4
; CHECK-P8-NEXT: vmrglw v2, v3, v2
; CHECK-P8-NEXT: xxmrgld v2, v2, v4
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
@ -445,94 +401,78 @@ define <16 x i8> @test16elt(<16 x double>* nocapture readonly) local_unnamed_add
; CHECK-P9-NEXT: xscvdpsxws f8, f7
; CHECK-P9-NEXT: xxswapd vs7, vs7
; CHECK-P9-NEXT: xscvdpsxws f7, f7
; CHECK-P9-NEXT: lxv vs6, 16(r3)
; CHECK-P9-NEXT: lxv vs0, 112(r3)
; CHECK-P9-NEXT: lxv vs1, 96(r3)
; CHECK-P9-NEXT: lxv vs2, 80(r3)
; CHECK-P9-NEXT: lxv vs3, 64(r3)
; CHECK-P9-NEXT: lxv vs4, 48(r3)
; CHECK-P9-NEXT: lxv vs5, 32(r3)
; CHECK-P9-NEXT: lxv vs6, 16(r3)
; CHECK-P9-NEXT: mffprwz r3, f8
; CHECK-P9-NEXT: mtfprd f8, r3
; CHECK-P9-NEXT: mtvsrd v2, r3
; CHECK-P9-NEXT: mffprwz r3, f7
; CHECK-P9-NEXT: xxswapd v2, vs8
; CHECK-P9-NEXT: mtfprd f7, r3
; CHECK-P9-NEXT: xxswapd v3, vs7
; CHECK-P9-NEXT: xscvdpsxws f7, f6
; CHECK-P9-NEXT: xxswapd vs6, vs6
; CHECK-P9-NEXT: mtvsrd v3, r3
; CHECK-P9-NEXT: xscvdpsxws f6, f6
; CHECK-P9-NEXT: vmrghb v2, v2, v3
; CHECK-P9-NEXT: mffprwz r3, f7
; CHECK-P9-NEXT: mtfprd f7, r3
; CHECK-P9-NEXT: mtvsrd v3, r3
; CHECK-P9-NEXT: mffprwz r3, f6
; CHECK-P9-NEXT: mtfprd f6, r3
; CHECK-P9-NEXT: xxswapd v4, vs6
; CHECK-P9-NEXT: xscvdpsxws f6, f5
; CHECK-P9-NEXT: xxswapd vs5, vs5
; CHECK-P9-NEXT: mtvsrd v4, r3
; CHECK-P9-NEXT: xscvdpsxws f5, f5
; CHECK-P9-NEXT: vmrghb v3, v3, v4
; CHECK-P9-NEXT: mffprwz r3, f6
; CHECK-P9-NEXT: mtfprd f6, r3
; CHECK-P9-NEXT: mffprwz r3, f5
; CHECK-P9-NEXT: vmrglb v2, v2, v3
; CHECK-P9-NEXT: xxswapd v3, vs7
; CHECK-P9-NEXT: vmrglb v3, v3, v4
; CHECK-P9-NEXT: vmrglh v2, v3, v2
; CHECK-P9-NEXT: xxswapd v3, vs6
; CHECK-P9-NEXT: mtfprd f5, r3
; CHECK-P9-NEXT: xxswapd v4, vs5
; CHECK-P9-NEXT: mtvsrd v3, r3
; CHECK-P9-NEXT: mffprwz r3, f5
; CHECK-P9-NEXT: xscvdpsxws f5, f4
; CHECK-P9-NEXT: xxswapd vs4, vs4
; CHECK-P9-NEXT: mtvsrd v4, r3
; CHECK-P9-NEXT: xscvdpsxws f4, f4
; CHECK-P9-NEXT: vmrghb v3, v3, v4
; CHECK-P9-NEXT: mffprwz r3, f5
; CHECK-P9-NEXT: mtfprd f5, r3
; CHECK-P9-NEXT: mtvsrd v4, r3
; CHECK-P9-NEXT: mffprwz r3, f4
; CHECK-P9-NEXT: mtfprd f4, r3
; CHECK-P9-NEXT: xxswapd v5, vs4
; CHECK-P9-NEXT: xscvdpsxws f4, f3
; CHECK-P9-NEXT: xxswapd vs3, vs3
; CHECK-P9-NEXT: mtvsrd v5, r3
; CHECK-P9-NEXT: xscvdpsxws f3, f3
; CHECK-P9-NEXT: vmrglb v3, v3, v4
; CHECK-P9-NEXT: xxswapd v4, vs5
; CHECK-P9-NEXT: vmrglb v4, v4, v5
; CHECK-P9-NEXT: vmrghb v4, v4, v5
; CHECK-P9-NEXT: vmrglh v3, v4, v3
; CHECK-P9-NEXT: mffprwz r3, f4
; CHECK-P9-NEXT: mtfprd f4, r3
; CHECK-P9-NEXT: vmrglw v2, v3, v2
; CHECK-P9-NEXT: mtvsrd v3, r3
; CHECK-P9-NEXT: mffprwz r3, f3
; CHECK-P9-NEXT: mtfprd f3, r3
; CHECK-P9-NEXT: xxswapd v4, vs3
; CHECK-P9-NEXT: xscvdpsxws f3, f2
; CHECK-P9-NEXT: xxswapd vs2, vs2
; CHECK-P9-NEXT: mtvsrd v4, r3
; CHECK-P9-NEXT: xscvdpsxws f2, f2
; CHECK-P9-NEXT: vmrghb v3, v3, v4
; CHECK-P9-NEXT: mffprwz r3, f3
; CHECK-P9-NEXT: mtfprd f3, r3
; CHECK-P9-NEXT: mtvsrd v4, r3
; CHECK-P9-NEXT: mffprwz r3, f2
; CHECK-P9-NEXT: mtfprd f2, r3
; CHECK-P9-NEXT: xxswapd v5, vs2
; CHECK-P9-NEXT: xscvdpsxws f2, f1
; CHECK-P9-NEXT: xxswapd vs1, vs1
; CHECK-P9-NEXT: mtvsrd v5, r3
; CHECK-P9-NEXT: xscvdpsxws f1, f1
; CHECK-P9-NEXT: vmrglw v2, v3, v2
; CHECK-P9-NEXT: xxswapd v3, vs4
; CHECK-P9-NEXT: vmrglb v3, v3, v4
; CHECK-P9-NEXT: xxswapd v4, vs3
; CHECK-P9-NEXT: vmrglb v4, v4, v5
; CHECK-P9-NEXT: vmrglh v3, v4, v3
; CHECK-P9-NEXT: vmrghb v4, v4, v5
; CHECK-P9-NEXT: mffprwz r3, f2
; CHECK-P9-NEXT: mtfprd f2, r3
; CHECK-P9-NEXT: vmrglh v3, v4, v3
; CHECK-P9-NEXT: mtvsrd v4, r3
; CHECK-P9-NEXT: mffprwz r3, f1
; CHECK-P9-NEXT: xxswapd v4, vs2
; CHECK-P9-NEXT: mtfprd f1, r3
; CHECK-P9-NEXT: xxswapd v5, vs1
; CHECK-P9-NEXT: xscvdpsxws f1, f0
; CHECK-P9-NEXT: xxswapd vs0, vs0
; CHECK-P9-NEXT: mtvsrd v5, r3
; CHECK-P9-NEXT: xscvdpsxws f0, f0
; CHECK-P9-NEXT: vmrghb v4, v4, v5
; CHECK-P9-NEXT: mffprwz r3, f1
; CHECK-P9-NEXT: mtfprd f1, r3
; CHECK-P9-NEXT: mtvsrd v5, r3
; CHECK-P9-NEXT: mffprwz r3, f0
; CHECK-P9-NEXT: mtfprd f0, r3
; CHECK-P9-NEXT: vmrglb v4, v4, v5
; CHECK-P9-NEXT: xxswapd v5, vs1
; CHECK-P9-NEXT: xxswapd v0, vs0
; CHECK-P9-NEXT: vmrglb v5, v5, v0
; CHECK-P9-NEXT: mtvsrd v0, r3
; CHECK-P9-NEXT: vmrghb v5, v5, v0
; CHECK-P9-NEXT: vmrglh v4, v5, v4
; CHECK-P9-NEXT: vmrglw v3, v4, v3
; CHECK-P9-NEXT: xxmrgld v2, v3, v2
@ -649,12 +589,10 @@ define i16 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
; CHECK-P8-NEXT: xscvdpsxws f1, v2
; CHECK-P8-NEXT: xscvdpsxws f0, f0
; CHECK-P8-NEXT: mffprwz r3, f1
; CHECK-P8-NEXT: mtvsrd v2, r3
; CHECK-P8-NEXT: mffprwz r4, f0
; CHECK-P8-NEXT: mtfprd f0, r3
; CHECK-P8-NEXT: mtfprd f1, r4
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: vmrglb v2, v2, v3
; CHECK-P8-NEXT: mtvsrd v3, r4
; CHECK-P8-NEXT: vmrghb v2, v2, v3
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: mffprd r3, f0
; CHECK-P8-NEXT: clrldi r3, r3, 48
@ -666,15 +604,13 @@ define i16 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xscvdpsxws f0, v2
; CHECK-P9-NEXT: mffprwz r3, f0
; CHECK-P9-NEXT: mtfprd f0, r3
; CHECK-P9-NEXT: xxswapd v3, vs0
; CHECK-P9-NEXT: xxswapd vs0, v2
; CHECK-P9-NEXT: mtvsrd v3, r3
; CHECK-P9-NEXT: xscvdpsxws f0, f0
; CHECK-P9-NEXT: mffprwz r3, f0
; CHECK-P9-NEXT: mtfprd f0, r3
; CHECK-P9-NEXT: mtvsrd v2, r3
; CHECK-P9-NEXT: addi r3, r1, -2
; CHECK-P9-NEXT: xxswapd v2, vs0
; CHECK-P9-NEXT: vmrglb v2, v3, v2
; CHECK-P9-NEXT: vmrghb v2, v3, v2
; CHECK-P9-NEXT: vsldoi v2, v2, v2, 8
; CHECK-P9-NEXT: stxsihx v2, 0, r3
; CHECK-P9-NEXT: lhz r3, -2(r1)
@ -717,18 +653,14 @@ define i32 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr
; CHECK-P8-NEXT: xscvdpsxws f1, f1
; CHECK-P8-NEXT: mffprwz r3, f2
; CHECK-P8-NEXT: mffprwz r4, f3
; CHECK-P8-NEXT: mtfprd f2, r3
; CHECK-P8-NEXT: mtfprd f3, r4
; CHECK-P8-NEXT: mtvsrd v2, r3
; CHECK-P8-NEXT: mtvsrd v3, r4
; CHECK-P8-NEXT: mffprwz r3, f0
; CHECK-P8-NEXT: xxswapd v2, vs2
; CHECK-P8-NEXT: mffprwz r4, f1
; CHECK-P8-NEXT: xxswapd v4, vs3
; CHECK-P8-NEXT: mtfprd f0, r3
; CHECK-P8-NEXT: mtfprd f1, r4
; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: xxswapd v5, vs1
; CHECK-P8-NEXT: vmrglb v2, v3, v2
; CHECK-P8-NEXT: vmrglb v3, v5, v4
; CHECK-P8-NEXT: mtvsrd v4, r3
; CHECK-P8-NEXT: mtvsrd v5, r4
; CHECK-P8-NEXT: vmrghb v2, v4, v2
; CHECK-P8-NEXT: vmrghb v3, v5, v3
; CHECK-P8-NEXT: vmrglh v2, v3, v2
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: mffprwz r3, f0
@ -742,24 +674,20 @@ define i32 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr
; CHECK-P9-NEXT: xscvdpsxws f1, f1
; CHECK-P9-NEXT: lxv vs0, 16(r3)
; CHECK-P9-NEXT: mffprwz r3, f2
; CHECK-P9-NEXT: mtfprd f2, r3
; CHECK-P9-NEXT: mtvsrd v2, r3
; CHECK-P9-NEXT: mffprwz r3, f1
; CHECK-P9-NEXT: xxswapd v2, vs2
; CHECK-P9-NEXT: mtfprd f1, r3
; CHECK-P9-NEXT: xxswapd v3, vs1
; CHECK-P9-NEXT: xscvdpsxws f1, f0
; CHECK-P9-NEXT: xxswapd vs0, vs0
; CHECK-P9-NEXT: mtvsrd v3, r3
; CHECK-P9-NEXT: xscvdpsxws f0, f0
; CHECK-P9-NEXT: vmrghb v2, v2, v3
; CHECK-P9-NEXT: mffprwz r3, f1
; CHECK-P9-NEXT: mtfprd f1, r3
; CHECK-P9-NEXT: mtvsrd v3, r3
; CHECK-P9-NEXT: mffprwz r3, f0
; CHECK-P9-NEXT: mtfprd f0, r3
; CHECK-P9-NEXT: vmrglb v2, v2, v3
; CHECK-P9-NEXT: xxswapd v3, vs1
; CHECK-P9-NEXT: xxswapd v4, vs0
; CHECK-P9-NEXT: vmrglb v3, v3, v4
; CHECK-P9-NEXT: vmrglh v2, v3, v2
; CHECK-P9-NEXT: mtvsrd v4, r3
; CHECK-P9-NEXT: li r3, 0
; CHECK-P9-NEXT: vmrghb v3, v3, v4
; CHECK-P9-NEXT: vmrglh v2, v3, v2
; CHECK-P9-NEXT: vextuwrx r3, r3, v2
; CHECK-P9-NEXT: blr
;
@ -818,36 +746,28 @@ define i64 @test8elt_signed(<8 x double>* nocapture readonly) local_unnamed_addr
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: xscvdpsxws f0, f0
; CHECK-P8-NEXT: xscvdpsxws f1, f1
; CHECK-P8-NEXT: mffprwz r3, f4
; CHECK-P8-NEXT: xscvdpsxws f2, f2
; CHECK-P8-NEXT: xscvdpsxws f3, f3
; CHECK-P8-NEXT: mffprwz r3, f4
; CHECK-P8-NEXT: mffprwz r4, f5
; CHECK-P8-NEXT: mtfprd f4, r3
; CHECK-P8-NEXT: mtvsrd v2, r3
; CHECK-P8-NEXT: mffprwz r3, f6
; CHECK-P8-NEXT: mtfprd f5, r4
; CHECK-P8-NEXT: xxswapd v2, vs4
; CHECK-P8-NEXT: mtvsrd v3, r4
; CHECK-P8-NEXT: mffprwz r4, f7
; CHECK-P8-NEXT: mtfprd f6, r3
; CHECK-P8-NEXT: xxswapd v3, vs5
; CHECK-P8-NEXT: mtvsrd v4, r3
; CHECK-P8-NEXT: mtvsrd v5, r4
; CHECK-P8-NEXT: mffprwz r3, f0
; CHECK-P8-NEXT: mtfprd f7, r4
; CHECK-P8-NEXT: xxswapd v4, vs6
; CHECK-P8-NEXT: mffprwz r4, f1
; CHECK-P8-NEXT: mtfprd f0, r3
; CHECK-P8-NEXT: xxswapd v1, vs7
; CHECK-P8-NEXT: mtvsrd v0, r3
; CHECK-P8-NEXT: mtvsrd v1, r4
; CHECK-P8-NEXT: mffprwz r3, f2
; CHECK-P8-NEXT: mtfprd f1, r4
; CHECK-P8-NEXT: xxswapd v5, vs0
; CHECK-P8-NEXT: mffprwz r4, f3
; CHECK-P8-NEXT: mtfprd f2, r3
; CHECK-P8-NEXT: xxswapd v0, vs1
; CHECK-P8-NEXT: mtfprd f0, r4
; CHECK-P8-NEXT: xxswapd v6, vs2
; CHECK-P8-NEXT: vmrglb v2, v5, v2
; CHECK-P8-NEXT: xxswapd v5, vs0
; CHECK-P8-NEXT: vmrglb v3, v0, v3
; CHECK-P8-NEXT: vmrglb v4, v6, v4
; CHECK-P8-NEXT: vmrglb v5, v5, v1
; CHECK-P8-NEXT: vmrghb v2, v0, v2
; CHECK-P8-NEXT: vmrghb v3, v1, v3
; CHECK-P8-NEXT: mtvsrd v0, r3
; CHECK-P8-NEXT: mtvsrd v1, r4
; CHECK-P8-NEXT: vmrghb v4, v0, v4
; CHECK-P8-NEXT: vmrghb v5, v1, v5
; CHECK-P8-NEXT: vmrglh v2, v3, v2
; CHECK-P8-NEXT: vmrglh v3, v5, v4
; CHECK-P8-NEXT: vmrglw v2, v3, v2
@ -861,47 +781,39 @@ define i64 @test8elt_signed(<8 x double>* nocapture readonly) local_unnamed_addr
; CHECK-P9-NEXT: xscvdpsxws f4, f3
; CHECK-P9-NEXT: xxswapd vs3, vs3
; CHECK-P9-NEXT: xscvdpsxws f3, f3
; CHECK-P9-NEXT: lxv vs2, 16(r3)
; CHECK-P9-NEXT: lxv vs0, 48(r3)
; CHECK-P9-NEXT: lxv vs1, 32(r3)
; CHECK-P9-NEXT: lxv vs2, 16(r3)
; CHECK-P9-NEXT: mffprwz r3, f4
; CHECK-P9-NEXT: mtfprd f4, r3
; CHECK-P9-NEXT: mtvsrd v2, r3
; CHECK-P9-NEXT: mffprwz r3, f3
; CHECK-P9-NEXT: xxswapd v2, vs4
; CHECK-P9-NEXT: mtfprd f3, r3
; CHECK-P9-NEXT: xxswapd v3, vs3
; CHECK-P9-NEXT: xscvdpsxws f3, f2
; CHECK-P9-NEXT: xxswapd vs2, vs2
; CHECK-P9-NEXT: mtvsrd v3, r3
; CHECK-P9-NEXT: xscvdpsxws f2, f2
; CHECK-P9-NEXT: vmrghb v2, v2, v3
; CHECK-P9-NEXT: mffprwz r3, f3
; CHECK-P9-NEXT: mtfprd f3, r3
; CHECK-P9-NEXT: mtvsrd v3, r3
; CHECK-P9-NEXT: mffprwz r3, f2
; CHECK-P9-NEXT: mtfprd f2, r3
; CHECK-P9-NEXT: xxswapd v4, vs2
; CHECK-P9-NEXT: xscvdpsxws f2, f1
; CHECK-P9-NEXT: xxswapd vs1, vs1
; CHECK-P9-NEXT: mtvsrd v4, r3
; CHECK-P9-NEXT: xscvdpsxws f1, f1
; CHECK-P9-NEXT: vmrghb v3, v3, v4
; CHECK-P9-NEXT: mffprwz r3, f2
; CHECK-P9-NEXT: mtfprd f2, r3
; CHECK-P9-NEXT: mffprwz r3, f1
; CHECK-P9-NEXT: vmrglb v2, v2, v3
; CHECK-P9-NEXT: xxswapd v3, vs3
; CHECK-P9-NEXT: vmrglb v3, v3, v4
; CHECK-P9-NEXT: vmrglh v2, v3, v2
; CHECK-P9-NEXT: xxswapd v3, vs2
; CHECK-P9-NEXT: mtfprd f1, r3
; CHECK-P9-NEXT: xxswapd v4, vs1
; CHECK-P9-NEXT: mtvsrd v3, r3
; CHECK-P9-NEXT: mffprwz r3, f1
; CHECK-P9-NEXT: xscvdpsxws f1, f0
; CHECK-P9-NEXT: xxswapd vs0, vs0
; CHECK-P9-NEXT: mtvsrd v4, r3
; CHECK-P9-NEXT: xscvdpsxws f0, f0
; CHECK-P9-NEXT: vmrghb v3, v3, v4
; CHECK-P9-NEXT: mffprwz r3, f1
; CHECK-P9-NEXT: mtfprd f1, r3
; CHECK-P9-NEXT: mtvsrd v4, r3
; CHECK-P9-NEXT: mffprwz r3, f0
; CHECK-P9-NEXT: mtfprd f0, r3
; CHECK-P9-NEXT: vmrglb v3, v3, v4
; CHECK-P9-NEXT: xxswapd v4, vs1
; CHECK-P9-NEXT: xxswapd v5, vs0
; CHECK-P9-NEXT: vmrglb v4, v4, v5
; CHECK-P9-NEXT: mtvsrd v5, r3
; CHECK-P9-NEXT: vmrghb v4, v4, v5
; CHECK-P9-NEXT: vmrglh v3, v4, v3
; CHECK-P9-NEXT: vmrglw v2, v3, v2
; CHECK-P9-NEXT: mfvsrld r3, v2
@ -997,79 +909,63 @@ define <16 x i8> @test16elt_signed(<16 x double>* nocapture readonly) local_unna
; CHECK-P8-NEXT: xxswapd vs7, vs7
; CHECK-P8-NEXT: xscvdpsxws v2, f9
; CHECK-P8-NEXT: xxswapd vs9, vs9
; CHECK-P8-NEXT: mffprwz r3, f4
; CHECK-P8-NEXT: xscvdpsxws v3, f11
; CHECK-P8-NEXT: xxswapd vs11, vs11
; CHECK-P8-NEXT: mffprwz r3, f4
; CHECK-P8-NEXT: mffprwz r4, f6
; CHECK-P8-NEXT: xscvdpsxws f0, f0
; CHECK-P8-NEXT: mtfprd f4, r3
; CHECK-P8-NEXT: mffprwz r3, f8
; CHECK-P8-NEXT: xscvdpsxws f1, f1
; CHECK-P8-NEXT: xxswapd v4, vs4
; CHECK-P8-NEXT: mtfprd f6, r4
; CHECK-P8-NEXT: mtvsrd v4, r3
; CHECK-P8-NEXT: mffprwz r3, f8
; CHECK-P8-NEXT: mtvsrd v5, r4
; CHECK-P8-NEXT: mffprwz r4, f10
; CHECK-P8-NEXT: xscvdpsxws f2, f2
; CHECK-P8-NEXT: xxswapd v5, vs6
; CHECK-P8-NEXT: mtfprd f8, r3
; CHECK-P8-NEXT: mffprwz r3, f12
; CHECK-P8-NEXT: xscvdpsxws f3, f3
; CHECK-P8-NEXT: xxswapd v0, vs8
; CHECK-P8-NEXT: mtfprd f10, r4
; CHECK-P8-NEXT: mtvsrd v0, r3
; CHECK-P8-NEXT: mffprwz r3, f12
; CHECK-P8-NEXT: mtvsrd v1, r4
; CHECK-P8-NEXT: mffprwz r4, f13
; CHECK-P8-NEXT: xscvdpsxws f5, f5
; CHECK-P8-NEXT: xxswapd v1, vs10
; CHECK-P8-NEXT: mtfprd f12, r3
; CHECK-P8-NEXT: mfvsrwz r3, v2
; CHECK-P8-NEXT: xscvdpsxws f7, f7
; CHECK-P8-NEXT: xxswapd v6, vs12
; CHECK-P8-NEXT: mtfprd f13, r4
; CHECK-P8-NEXT: mtvsrd v6, r3
; CHECK-P8-NEXT: mfvsrwz r3, v2
; CHECK-P8-NEXT: mtvsrd v2, r4
; CHECK-P8-NEXT: mfvsrwz r4, v3
; CHECK-P8-NEXT: mtvsrd v2, r3
; CHECK-P8-NEXT: xxswapd v7, vs13
; CHECK-P8-NEXT: mffprwz r3, f0
; CHECK-P8-NEXT: xscvdpsxws f9, f9
; CHECK-P8-NEXT: xxswapd v2, v2
; CHECK-P8-NEXT: xscvdpsxws f11, f11
; CHECK-P8-NEXT: mtvsrd v3, r4
; CHECK-P8-NEXT: mtvsrd v3, r3
; CHECK-P8-NEXT: mtvsrd v7, r4
; CHECK-P8-NEXT: mffprwz r3, f0
; CHECK-P8-NEXT: mffprwz r4, f1
; CHECK-P8-NEXT: mtfprd f0, r3
; CHECK-P8-NEXT: xxswapd v3, v3
; CHECK-P8-NEXT: mtvsrd v8, r3
; CHECK-P8-NEXT: mtvsrd v9, r4
; CHECK-P8-NEXT: mffprwz r3, f2
; CHECK-P8-NEXT: mtfprd f1, r4
; CHECK-P8-NEXT: xxswapd v8, vs0
; CHECK-P8-NEXT: mffprwz r4, f3
; CHECK-P8-NEXT: mtfprd f2, r3
; CHECK-P8-NEXT: xxswapd v9, vs1
; CHECK-P8-NEXT: vmrghb v4, v8, v4
; CHECK-P8-NEXT: vmrghb v5, v9, v5
; CHECK-P8-NEXT: mtvsrd v8, r3
; CHECK-P8-NEXT: mtvsrd v9, r4
; CHECK-P8-NEXT: mffprwz r3, f5
; CHECK-P8-NEXT: mtfprd f3, r4
; CHECK-P8-NEXT: xxswapd v10, vs2
; CHECK-P8-NEXT: mffprwz r4, f7
; CHECK-P8-NEXT: mtfprd f5, r3
; CHECK-P8-NEXT: vmrghb v0, v8, v0
; CHECK-P8-NEXT: vmrghb v1, v9, v1
; CHECK-P8-NEXT: mtvsrd v8, r3
; CHECK-P8-NEXT: mtvsrd v9, r4
; CHECK-P8-NEXT: mffprwz r3, f9
; CHECK-P8-NEXT: mtfprd f7, r4
; CHECK-P8-NEXT: mffprwz r4, f11
; CHECK-P8-NEXT: vmrglb v4, v8, v4
; CHECK-P8-NEXT: xxswapd v8, vs3
; CHECK-P8-NEXT: vmrglb v5, v9, v5
; CHECK-P8-NEXT: xxswapd v9, vs5
; CHECK-P8-NEXT: mtfprd f0, r3
; CHECK-P8-NEXT: mtfprd f1, r4
; CHECK-P8-NEXT: vmrglb v0, v10, v0
; CHECK-P8-NEXT: xxswapd v10, vs7
; CHECK-P8-NEXT: vmrglb v1, v8, v1
; CHECK-P8-NEXT: xxswapd v8, vs0
; CHECK-P8-NEXT: vmrglb v6, v9, v6
; CHECK-P8-NEXT: xxswapd v9, vs1
; CHECK-P8-NEXT: vmrglb v7, v10, v7
; CHECK-P8-NEXT: vmrglb v2, v8, v2
; CHECK-P8-NEXT: vmrglb v3, v9, v3
; CHECK-P8-NEXT: vmrghb v6, v8, v6
; CHECK-P8-NEXT: vmrghb v2, v9, v2
; CHECK-P8-NEXT: mtvsrd v8, r3
; CHECK-P8-NEXT: mtvsrd v9, r4
; CHECK-P8-NEXT: vmrghb v3, v8, v3
; CHECK-P8-NEXT: vmrghb v7, v9, v7
; CHECK-P8-NEXT: vmrglh v4, v5, v4
; CHECK-P8-NEXT: vmrglh v5, v1, v0
; CHECK-P8-NEXT: vmrglh v0, v7, v6
; CHECK-P8-NEXT: vmrglh v2, v3, v2
; CHECK-P8-NEXT: vmrglw v3, v5, v4
; CHECK-P8-NEXT: vmrglw v2, v2, v0
; CHECK-P8-NEXT: xxmrgld v2, v2, v3
; CHECK-P8-NEXT: vmrglh v2, v2, v6
; CHECK-P8-NEXT: vmrglh v3, v7, v3
; CHECK-P8-NEXT: vmrglw v4, v5, v4
; CHECK-P8-NEXT: vmrglw v2, v3, v2
; CHECK-P8-NEXT: xxmrgld v2, v2, v4
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
@ -1078,94 +974,78 @@ define <16 x i8> @test16elt_signed(<16 x double>* nocapture readonly) local_unna
; CHECK-P9-NEXT: xscvdpsxws f8, f7
; CHECK-P9-NEXT: xxswapd vs7, vs7
; CHECK-P9-NEXT: xscvdpsxws f7, f7
; CHECK-P9-NEXT: lxv vs6, 16(r3)
; CHECK-P9-NEXT: lxv vs0, 112(r3)
; CHECK-P9-NEXT: lxv vs1, 96(r3)
; CHECK-P9-NEXT: lxv vs2, 80(r3)
; CHECK-P9-NEXT: lxv vs3, 64(r3)
; CHECK-P9-NEXT: lxv vs4, 48(r3)
; CHECK-P9-NEXT: lxv vs5, 32(r3)
; CHECK-P9-NEXT: lxv vs6, 16(r3)
; CHECK-P9-NEXT: mffprwz r3, f8
; CHECK-P9-NEXT: mtfprd f8, r3
; CHECK-P9-NEXT: mtvsrd v2, r3
; CHECK-P9-NEXT: mffprwz r3, f7
; CHECK-P9-NEXT: xxswapd v2, vs8
; CHECK-P9-NEXT: mtfprd f7, r3
; CHECK-P9-NEXT: xxswapd v3, vs7
; CHECK-P9-NEXT: xscvdpsxws f7, f6
; CHECK-P9-NEXT: xxswapd vs6, vs6
; CHECK-P9-NEXT: mtvsrd v3, r3
; CHECK-P9-NEXT: xscvdpsxws f6, f6
; CHECK-P9-NEXT: vmrghb v2, v2, v3
; CHECK-P9-NEXT: mffprwz r3, f7
; CHECK-P9-NEXT: mtfprd f7, r3
; CHECK-P9-NEXT: mtvsrd v3, r3
; CHECK-P9-NEXT: mffprwz r3, f6
; CHECK-P9-NEXT: mtfprd f6, r3
; CHECK-P9-NEXT: xxswapd v4, vs6
; CHECK-P9-NEXT: xscvdpsxws f6, f5
; CHECK-P9-NEXT: xxswapd vs5, vs5
; CHECK-P9-NEXT: mtvsrd v4, r3
; CHECK-P9-NEXT: xscvdpsxws f5, f5
; CHECK-P9-NEXT: vmrghb v3, v3, v4
; CHECK-P9-NEXT: mffprwz r3, f6
; CHECK-P9-NEXT: mtfprd f6, r3
; CHECK-P9-NEXT: mffprwz r3, f5
; CHECK-P9-NEXT: vmrglb v2, v2, v3
; CHECK-P9-NEXT: xxswapd v3, vs7
; CHECK-P9-NEXT: vmrglb v3, v3, v4
; CHECK-P9-NEXT: vmrglh v2, v3, v2
; CHECK-P9-NEXT: xxswapd v3, vs6
; CHECK-P9-NEXT: mtfprd f5, r3
; CHECK-P9-NEXT: xxswapd v4, vs5
; CHECK-P9-NEXT: mtvsrd v3, r3
; CHECK-P9-NEXT: mffprwz r3, f5
; CHECK-P9-NEXT: xscvdpsxws f5, f4
; CHECK-P9-NEXT: xxswapd vs4, vs4
; CHECK-P9-NEXT: mtvsrd v4, r3
; CHECK-P9-NEXT: xscvdpsxws f4, f4
; CHECK-P9-NEXT: vmrghb v3, v3, v4
; CHECK-P9-NEXT: mffprwz r3, f5
; CHECK-P9-NEXT: mtfprd f5, r3
; CHECK-P9-NEXT: mtvsrd v4, r3
; CHECK-P9-NEXT: mffprwz r3, f4
; CHECK-P9-NEXT: mtfprd f4, r3
; CHECK-P9-NEXT: xxswapd v5, vs4
; CHECK-P9-NEXT: xscvdpsxws f4, f3
; CHECK-P9-NEXT: xxswapd vs3, vs3
; CHECK-P9-NEXT: mtvsrd v5, r3
; CHECK-P9-NEXT: xscvdpsxws f3, f3
; CHECK-P9-NEXT: vmrglb v3, v3, v4
; CHECK-P9-NEXT: xxswapd v4, vs5
; CHECK-P9-NEXT: vmrglb v4, v4, v5
; CHECK-P9-NEXT: vmrghb v4, v4, v5
; CHECK-P9-NEXT: vmrglh v3, v4, v3
; CHECK-P9-NEXT: mffprwz r3, f4
; CHECK-P9-NEXT: mtfprd f4, r3
; CHECK-P9-NEXT: vmrglw v2, v3, v2
; CHECK-P9-NEXT: mtvsrd v3, r3
; CHECK-P9-NEXT: mffprwz r3, f3
; CHECK-P9-NEXT: mtfprd f3, r3
; CHECK-P9-NEXT: xxswapd v4, vs3
; CHECK-P9-NEXT: xscvdpsxws f3, f2
; CHECK-P9-NEXT: xxswapd vs2, vs2
; CHECK-P9-NEXT: mtvsrd v4, r3
; CHECK-P9-NEXT: xscvdpsxws f2, f2
; CHECK-P9-NEXT: vmrghb v3, v3, v4
; CHECK-P9-NEXT: mffprwz r3, f3
; CHECK-P9-NEXT: mtfprd f3, r3
; CHECK-P9-NEXT: mtvsrd v4, r3
; CHECK-P9-NEXT: mffprwz r3, f2
; CHECK-P9-NEXT: mtfprd f2, r3
; CHECK-P9-NEXT: xxswapd v5, vs2
; CHECK-P9-NEXT: xscvdpsxws f2, f1
; CHECK-P9-NEXT: xxswapd vs1, vs1
; CHECK-P9-NEXT: mtvsrd v5, r3
; CHECK-P9-NEXT: xscvdpsxws f1, f1
; CHECK-P9-NEXT: vmrglw v2, v3, v2
; CHECK-P9-NEXT: xxswapd v3, vs4
; CHECK-P9-NEXT: vmrglb v3, v3, v4
; CHECK-P9-NEXT: xxswapd v4, vs3
; CHECK-P9-NEXT: vmrglb v4, v4, v5
; CHECK-P9-NEXT: vmrglh v3, v4, v3
; CHECK-P9-NEXT: vmrghb v4, v4, v5
; CHECK-P9-NEXT: mffprwz r3, f2
; CHECK-P9-NEXT: mtfprd f2, r3
; CHECK-P9-NEXT: vmrglh v3, v4, v3
; CHECK-P9-NEXT: mtvsrd v4, r3
; CHECK-P9-NEXT: mffprwz r3, f1
; CHECK-P9-NEXT: xxswapd v4, vs2
; CHECK-P9-NEXT: mtfprd f1, r3
; CHECK-P9-NEXT: xxswapd v5, vs1
; CHECK-P9-NEXT: xscvdpsxws f1, f0
; CHECK-P9-NEXT: xxswapd vs0, vs0
; CHECK-P9-NEXT: mtvsrd v5, r3
; CHECK-P9-NEXT: xscvdpsxws f0, f0
; CHECK-P9-NEXT: vmrghb v4, v4, v5
; CHECK-P9-NEXT: mffprwz r3, f1
; CHECK-P9-NEXT: mtfprd f1, r3
; CHECK-P9-NEXT: mtvsrd v5, r3
; CHECK-P9-NEXT: mffprwz r3, f0
; CHECK-P9-NEXT: mtfprd f0, r3
; CHECK-P9-NEXT: vmrglb v4, v4, v5
; CHECK-P9-NEXT: xxswapd v5, vs1
; CHECK-P9-NEXT: xxswapd v0, vs0
; CHECK-P9-NEXT: vmrglb v5, v5, v0
; CHECK-P9-NEXT: mtvsrd v0, r3
; CHECK-P9-NEXT: vmrghb v5, v5, v0
; CHECK-P9-NEXT: vmrglh v4, v5, v4
; CHECK-P9-NEXT: vmrglw v3, v4, v3
; CHECK-P9-NEXT: xxmrgld v2, v3, v2

View File

@ -24,9 +24,9 @@ define i64 @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
; CHECK-P8-NEXT: xscvuxdsp f1, f1
; CHECK-P8-NEXT: xscvdpspn vs0, f0
; CHECK-P8-NEXT: xscvdpspn vs1, f1
; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 1
; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 1
; CHECK-P8-NEXT: vmrglw v2, v3, v2
; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 3
; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 3
; CHECK-P8-NEXT: vmrghw v2, v3, v2
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: mffprd r3, f0
; CHECK-P8-NEXT: blr
@ -43,12 +43,12 @@ define i64 @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
; CHECK-P9-NEXT: xscvdpspn vs0, f0
; CHECK-P9-NEXT: vextuhrx r3, r3, v2
; CHECK-P9-NEXT: clrlwi r3, r3, 16
; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 1
; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3
; CHECK-P9-NEXT: mtfprwz f0, r3
; CHECK-P9-NEXT: xscvuxdsp f0, f0
; CHECK-P9-NEXT: xscvdpspn vs0, f0
; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 1
; CHECK-P9-NEXT: vmrglw v2, v2, v3
; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 3
; CHECK-P9-NEXT: vmrghw v2, v2, v3
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
@ -80,25 +80,17 @@ entry:
define <4 x float> @test4elt(i64 %a.coerce) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test4elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r4, r2, .LCPI1_0@toc@ha
; CHECK-P8-NEXT: mtfprd f0, r3
; CHECK-P8-NEXT: addi r3, r4, .LCPI1_0@toc@l
; CHECK-P8-NEXT: xxlxor v4, v4, v4
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: lvx v3, 0, r3
; CHECK-P8-NEXT: vperm v2, v4, v2, v3
; CHECK-P8-NEXT: xxlxor v2, v2, v2
; CHECK-P8-NEXT: mtvsrd v3, r3
; CHECK-P8-NEXT: vmrghh v2, v2, v3
; CHECK-P8-NEXT: xvcvuxwsp v2, v2
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtfprd f0, r3
; CHECK-P9-NEXT: addis r3, r2, .LCPI1_0@toc@ha
; CHECK-P9-NEXT: addi r3, r3, .LCPI1_0@toc@l
; CHECK-P9-NEXT: lxvx v3, 0, r3
; CHECK-P9-NEXT: xxswapd v2, vs0
; CHECK-P9-NEXT: xxlxor v4, v4, v4
; CHECK-P9-NEXT: vperm v2, v4, v2, v3
; CHECK-P9-NEXT: mtvsrd v2, r3
; CHECK-P9-NEXT: xxlxor v3, v3, v3
; CHECK-P9-NEXT: vmrghh v2, v3, v2
; CHECK-P9-NEXT: xvcvuxwsp v2, v2
; CHECK-P9-NEXT: blr
;
@ -121,17 +113,11 @@ entry:
define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, <8 x i16> %a) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test8elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r4, r2, .LCPI2_0@toc@ha
; CHECK-P8-NEXT: addis r5, r2, .LCPI2_1@toc@ha
; CHECK-P8-NEXT: xxlxor v4, v4, v4
; CHECK-P8-NEXT: addi r4, r4, .LCPI2_0@toc@l
; CHECK-P8-NEXT: lvx v3, 0, r4
; CHECK-P8-NEXT: addi r4, r5, .LCPI2_1@toc@l
; CHECK-P8-NEXT: lvx v5, 0, r4
; CHECK-P8-NEXT: xxlxor v3, v3, v3
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: vperm v3, v4, v2, v3
; CHECK-P8-NEXT: vperm v2, v4, v2, v5
; CHECK-P8-NEXT: xvcvuxwsp v3, v3
; CHECK-P8-NEXT: vmrglh v4, v3, v2
; CHECK-P8-NEXT: vmrghh v2, v3, v2
; CHECK-P8-NEXT: xvcvuxwsp v3, v4
; CHECK-P8-NEXT: xvcvuxwsp v2, v2
; CHECK-P8-NEXT: stvx v3, 0, r3
; CHECK-P8-NEXT: stvx v2, r3, r4
@ -139,19 +125,13 @@ define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, <8 x i16>
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: addis r4, r2, .LCPI2_0@toc@ha
; CHECK-P9-NEXT: addi r4, r4, .LCPI2_0@toc@l
; CHECK-P9-NEXT: lxvx v3, 0, r4
; CHECK-P9-NEXT: xxlxor v4, v4, v4
; CHECK-P9-NEXT: addis r4, r2, .LCPI2_1@toc@ha
; CHECK-P9-NEXT: addi r4, r4, .LCPI2_1@toc@l
; CHECK-P9-NEXT: vperm v3, v4, v2, v3
; CHECK-P9-NEXT: xvcvuxwsp vs0, v3
; CHECK-P9-NEXT: lxvx v3, 0, r4
; CHECK-P9-NEXT: vperm v2, v4, v2, v3
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: xxlxor v3, v3, v3
; CHECK-P9-NEXT: vmrglh v4, v3, v2
; CHECK-P9-NEXT: vmrghh v2, v3, v2
; CHECK-P9-NEXT: xvcvuxwsp vs0, v4
; CHECK-P9-NEXT: xvcvuxwsp vs1, v2
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt:
@ -276,9 +256,9 @@ define i64 @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
; CHECK-P8-NEXT: xscvsxdsp f1, f1
; CHECK-P8-NEXT: xscvdpspn vs0, f0
; CHECK-P8-NEXT: xscvdpspn vs1, f1
; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 1
; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 1
; CHECK-P8-NEXT: vmrglw v2, v3, v2
; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 3
; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 3
; CHECK-P8-NEXT: vmrghw v2, v3, v2
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: mffprd r3, f0
; CHECK-P8-NEXT: blr
@ -295,12 +275,12 @@ define i64 @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
; CHECK-P9-NEXT: xscvdpspn vs0, f0
; CHECK-P9-NEXT: vextuhrx r3, r3, v2
; CHECK-P9-NEXT: extsh r3, r3
; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 1
; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3
; CHECK-P9-NEXT: mtfprwa f0, r3
; CHECK-P9-NEXT: xscvsxdsp f0, f0
; CHECK-P9-NEXT: xscvdpspn vs0, f0
; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 1
; CHECK-P9-NEXT: vmrglw v2, v2, v3
; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 3
; CHECK-P9-NEXT: vmrghw v2, v2, v3
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
@ -332,11 +312,10 @@ entry:
define <4 x float> @test4elt_signed(i64 %a.coerce) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test4elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mtfprd f0, r3
; CHECK-P8-NEXT: mtvsrd v2, r3
; CHECK-P8-NEXT: vspltisw v3, 8
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: vmrghh v2, v2, v2
; CHECK-P8-NEXT: vadduwm v3, v3, v3
; CHECK-P8-NEXT: vmrglh v2, v2, v2
; CHECK-P8-NEXT: vslw v2, v2, v3
; CHECK-P8-NEXT: vsraw v2, v2, v3
; CHECK-P8-NEXT: xvcvsxwsp v2, v2
@ -344,9 +323,8 @@ define <4 x float> @test4elt_signed(i64 %a.coerce) local_unnamed_addr #1 {
;
; CHECK-P9-LABEL: test4elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtfprd f0, r3
; CHECK-P9-NEXT: xxswapd v2, vs0
; CHECK-P9-NEXT: vmrglh v2, v2, v2
; CHECK-P9-NEXT: mtvsrd v2, r3
; CHECK-P9-NEXT: vmrghh v2, v2, v2
; CHECK-P9-NEXT: vextsh2w v2, v2
; CHECK-P9-NEXT: xvcvsxwsp v2, v2
; CHECK-P9-NEXT: blr

View File

@ -13,11 +13,10 @@ define <2 x double> @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
; CHECK-P8-LABEL: test2elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r4, r2, .LCPI0_0@toc@ha
; CHECK-P8-NEXT: mtfprd f0, r3
; CHECK-P8-NEXT: addi r3, r4, .LCPI0_0@toc@l
; CHECK-P8-NEXT: mtvsrwz v2, r3
; CHECK-P8-NEXT: addi r4, r4, .LCPI0_0@toc@l
; CHECK-P8-NEXT: xxlxor v4, v4, v4
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: lvx v3, 0, r3
; CHECK-P8-NEXT: lvx v3, 0, r4
; CHECK-P8-NEXT: vperm v2, v4, v2, v3
; CHECK-P8-NEXT: xvcvuxddp v2, v2
; CHECK-P8-NEXT: blr
@ -53,19 +52,18 @@ define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, i64 %a.c
; CHECK-P8-LABEL: test4elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r5, r2, .LCPI1_0@toc@ha
; CHECK-P8-NEXT: mtfprd f0, r4
; CHECK-P8-NEXT: addis r4, r2, .LCPI1_1@toc@ha
; CHECK-P8-NEXT: addis r6, r2, .LCPI1_1@toc@ha
; CHECK-P8-NEXT: mtvsrd v2, r4
; CHECK-P8-NEXT: addi r5, r5, .LCPI1_0@toc@l
; CHECK-P8-NEXT: addi r4, r4, .LCPI1_1@toc@l
; CHECK-P8-NEXT: addi r4, r6, .LCPI1_1@toc@l
; CHECK-P8-NEXT: xxlxor v4, v4, v4
; CHECK-P8-NEXT: lvx v2, 0, r5
; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: lvx v3, 0, r5
; CHECK-P8-NEXT: lvx v5, 0, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: vperm v2, v4, v3, v2
; CHECK-P8-NEXT: vperm v3, v4, v3, v5
; CHECK-P8-NEXT: xvcvuxddp vs0, v2
; CHECK-P8-NEXT: xvcvuxddp vs1, v3
; CHECK-P8-NEXT: vperm v3, v4, v2, v3
; CHECK-P8-NEXT: vperm v2, v4, v2, v5
; CHECK-P8-NEXT: xvcvuxddp vs0, v3
; CHECK-P8-NEXT: xvcvuxddp vs1, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
@ -74,11 +72,10 @@ define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, i64 %a.c
;
; CHECK-P9-LABEL: test4elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtfprd f0, r4
; CHECK-P9-NEXT: mtvsrd v2, r4
; CHECK-P9-NEXT: addis r4, r2, .LCPI1_0@toc@ha
; CHECK-P9-NEXT: addi r4, r4, .LCPI1_0@toc@l
; CHECK-P9-NEXT: lxvx v3, 0, r4
; CHECK-P9-NEXT: xxswapd v2, vs0
; CHECK-P9-NEXT: xxlxor v4, v4, v4
; CHECK-P9-NEXT: addis r4, r2, .LCPI1_1@toc@ha
; CHECK-P9-NEXT: addi r4, r4, .LCPI1_1@toc@l
@ -370,14 +367,13 @@ define <2 x double> @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
; CHECK-P8-LABEL: test2elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r4, r2, .LCPI4_0@toc@ha
; CHECK-P8-NEXT: mtfprd f0, r3
; CHECK-P8-NEXT: addi r3, r4, .LCPI4_0@toc@l
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: lvx v3, 0, r3
; CHECK-P8-NEXT: mtvsrwz v3, r3
; CHECK-P8-NEXT: addis r3, r2, .LCPI4_1@toc@ha
; CHECK-P8-NEXT: addi r4, r4, .LCPI4_0@toc@l
; CHECK-P8-NEXT: addi r3, r3, .LCPI4_1@toc@l
; CHECK-P8-NEXT: lvx v2, 0, r4
; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: vperm v2, v2, v2, v3
; CHECK-P8-NEXT: vperm v2, v3, v3, v2
; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: vsld v2, v2, v3
; CHECK-P8-NEXT: vsrad v2, v2, v3
@ -415,17 +411,16 @@ define void @test4elt_signed(<4 x double>* noalias nocapture sret %agg.result, i
; CHECK-P8-LABEL: test4elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r5, r2, .LCPI5_0@toc@ha
; CHECK-P8-NEXT: mtfprd f0, r4
; CHECK-P8-NEXT: addis r4, r2, .LCPI5_2@toc@ha
; CHECK-P8-NEXT: addi r5, r5, .LCPI5_0@toc@l
; CHECK-P8-NEXT: addi r4, r4, .LCPI5_2@toc@l
; CHECK-P8-NEXT: lvx v2, 0, r5
; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: lvx v4, 0, r4
; CHECK-P8-NEXT: addis r6, r2, .LCPI5_2@toc@ha
; CHECK-P8-NEXT: mtvsrd v3, r4
; CHECK-P8-NEXT: addis r4, r2, .LCPI5_1@toc@ha
; CHECK-P8-NEXT: addi r5, r5, .LCPI5_0@toc@l
; CHECK-P8-NEXT: addi r4, r4, .LCPI5_1@toc@l
; CHECK-P8-NEXT: lvx v2, 0, r5
; CHECK-P8-NEXT: addi r5, r6, .LCPI5_2@toc@l
; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: lvx v4, 0, r5
; CHECK-P8-NEXT: vperm v2, v3, v3, v2
; CHECK-P8-NEXT: vperm v3, v3, v3, v4
; CHECK-P8-NEXT: xxswapd v4, vs0
@ -443,14 +438,13 @@ define void @test4elt_signed(<4 x double>* noalias nocapture sret %agg.result, i
;
; CHECK-P9-LABEL: test4elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtfprd f0, r4
; CHECK-P9-NEXT: mtvsrd v2, r4
; CHECK-P9-NEXT: addis r4, r2, .LCPI5_0@toc@ha
; CHECK-P9-NEXT: addi r4, r4, .LCPI5_0@toc@l
; CHECK-P9-NEXT: lxvx v3, 0, r4
; CHECK-P9-NEXT: xxswapd v2, vs0
; CHECK-P9-NEXT: vperm v3, v2, v2, v3
; CHECK-P9-NEXT: addis r4, r2, .LCPI5_1@toc@ha
; CHECK-P9-NEXT: addi r4, r4, .LCPI5_1@toc@l
; CHECK-P9-NEXT: vperm v3, v2, v2, v3
; CHECK-P9-NEXT: vextsh2d v3, v3
; CHECK-P9-NEXT: xvcvsxddp vs0, v3
; CHECK-P9-NEXT: lxvx v3, 0, r4

View File

@ -18,9 +18,9 @@ define i64 @test2elt(<2 x i64> %a) local_unnamed_addr #0 {
; CHECK-P8-NEXT: xscvuxdsp f0, f0
; CHECK-P8-NEXT: xscvdpspn vs1, f1
; CHECK-P8-NEXT: xscvdpspn vs0, f0
; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 1
; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 1
; CHECK-P8-NEXT: vmrglw v2, v3, v2
; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 3
; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 3
; CHECK-P8-NEXT: vmrghw v2, v3, v2
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: mffprd r3, f0
; CHECK-P8-NEXT: blr
@ -30,12 +30,12 @@ define i64 @test2elt(<2 x i64> %a) local_unnamed_addr #0 {
; CHECK-P9-NEXT: xxswapd vs0, v2
; CHECK-P9-NEXT: xscvuxdsp f0, f0
; CHECK-P9-NEXT: xscvdpspn vs0, f0
; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 1
; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3
; CHECK-P9-NEXT: xxlor vs0, v2, v2
; CHECK-P9-NEXT: xscvuxdsp f0, f0
; CHECK-P9-NEXT: xscvdpspn vs0, f0
; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 1
; CHECK-P9-NEXT: vmrglw v2, v2, v3
; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 3
; CHECK-P9-NEXT: vmrghw v2, v2, v3
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
@ -311,9 +311,9 @@ define i64 @test2elt_signed(<2 x i64> %a) local_unnamed_addr #0 {
; CHECK-P8-NEXT: xscvsxdsp f0, f0
; CHECK-P8-NEXT: xscvdpspn vs1, f1
; CHECK-P8-NEXT: xscvdpspn vs0, f0
; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 1
; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 1
; CHECK-P8-NEXT: vmrglw v2, v3, v2
; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 3
; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 3
; CHECK-P8-NEXT: vmrghw v2, v3, v2
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: mffprd r3, f0
; CHECK-P8-NEXT: blr
@ -323,12 +323,12 @@ define i64 @test2elt_signed(<2 x i64> %a) local_unnamed_addr #0 {
; CHECK-P9-NEXT: xxswapd vs0, v2
; CHECK-P9-NEXT: xscvsxdsp f0, f0
; CHECK-P9-NEXT: xscvdpspn vs0, f0
; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 1
; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3
; CHECK-P9-NEXT: xxlor vs0, v2, v2
; CHECK-P9-NEXT: xscvsxdsp f0, f0
; CHECK-P9-NEXT: xscvdpspn vs0, f0
; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 1
; CHECK-P9-NEXT: vmrglw v2, v2, v3
; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 3
; CHECK-P9-NEXT: vmrghw v2, v2, v3
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;

View File

@ -24,9 +24,9 @@ define i64 @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
; CHECK-P8-NEXT: xscvuxdsp f1, f1
; CHECK-P8-NEXT: xscvdpspn vs0, f0
; CHECK-P8-NEXT: xscvdpspn vs1, f1
; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 1
; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 1
; CHECK-P8-NEXT: vmrglw v2, v3, v2
; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 3
; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 3
; CHECK-P8-NEXT: vmrghw v2, v3, v2
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: mffprd r3, f0
; CHECK-P8-NEXT: blr
@ -43,12 +43,12 @@ define i64 @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
; CHECK-P9-NEXT: xscvdpspn vs0, f0
; CHECK-P9-NEXT: vextubrx r3, r3, v2
; CHECK-P9-NEXT: clrlwi r3, r3, 24
; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 1
; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3
; CHECK-P9-NEXT: mtfprwz f0, r3
; CHECK-P9-NEXT: xscvuxdsp f0, f0
; CHECK-P9-NEXT: xscvdpspn vs0, f0
; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 1
; CHECK-P9-NEXT: vmrglw v2, v2, v3
; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 3
; CHECK-P9-NEXT: vmrghw v2, v2, v3
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
@ -81,11 +81,10 @@ define <4 x float> @test4elt(i32 %a.coerce) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test4elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r4, r2, .LCPI1_0@toc@ha
; CHECK-P8-NEXT: mtfprd f0, r3
; CHECK-P8-NEXT: addi r3, r4, .LCPI1_0@toc@l
; CHECK-P8-NEXT: mtvsrwz v2, r3
; CHECK-P8-NEXT: addi r4, r4, .LCPI1_0@toc@l
; CHECK-P8-NEXT: xxlxor v4, v4, v4
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: lvx v3, 0, r3
; CHECK-P8-NEXT: lvx v3, 0, r4
; CHECK-P8-NEXT: vperm v2, v4, v2, v3
; CHECK-P8-NEXT: xvcvuxwsp v2, v2
; CHECK-P8-NEXT: blr
@ -121,30 +120,28 @@ define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, i64 %a.co
; CHECK-P8-LABEL: test8elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r5, r2, .LCPI2_0@toc@ha
; CHECK-P8-NEXT: mtfprd f0, r4
; CHECK-P8-NEXT: addis r4, r2, .LCPI2_1@toc@ha
; CHECK-P8-NEXT: addis r6, r2, .LCPI2_1@toc@ha
; CHECK-P8-NEXT: mtvsrd v2, r4
; CHECK-P8-NEXT: addi r5, r5, .LCPI2_0@toc@l
; CHECK-P8-NEXT: addi r4, r4, .LCPI2_1@toc@l
; CHECK-P8-NEXT: addi r4, r6, .LCPI2_1@toc@l
; CHECK-P8-NEXT: xxlxor v4, v4, v4
; CHECK-P8-NEXT: lvx v2, 0, r5
; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: lvx v3, 0, r5
; CHECK-P8-NEXT: lvx v5, 0, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: vperm v2, v4, v3, v2
; CHECK-P8-NEXT: vperm v3, v4, v3, v5
; CHECK-P8-NEXT: xvcvuxwsp v2, v2
; CHECK-P8-NEXT: vperm v3, v4, v2, v3
; CHECK-P8-NEXT: vperm v2, v4, v2, v5
; CHECK-P8-NEXT: xvcvuxwsp v3, v3
; CHECK-P8-NEXT: stvx v2, 0, r3
; CHECK-P8-NEXT: stvx v3, r3, r4
; CHECK-P8-NEXT: xvcvuxwsp v2, v2
; CHECK-P8-NEXT: stvx v3, 0, r3
; CHECK-P8-NEXT: stvx v2, r3, r4
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtfprd f0, r4
; CHECK-P9-NEXT: mtvsrd v2, r4
; CHECK-P9-NEXT: addis r4, r2, .LCPI2_0@toc@ha
; CHECK-P9-NEXT: addi r4, r4, .LCPI2_0@toc@l
; CHECK-P9-NEXT: lxvx v3, 0, r4
; CHECK-P9-NEXT: xxswapd v2, vs0
; CHECK-P9-NEXT: xxlxor v4, v4, v4
; CHECK-P9-NEXT: addis r4, r2, .LCPI2_1@toc@ha
; CHECK-P9-NEXT: addi r4, r4, .LCPI2_1@toc@l
@ -292,9 +289,9 @@ define i64 @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
; CHECK-P8-NEXT: xscvsxdsp f1, f1
; CHECK-P8-NEXT: xscvdpspn vs0, f0
; CHECK-P8-NEXT: xscvdpspn vs1, f1
; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 1
; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 1
; CHECK-P8-NEXT: vmrglw v2, v3, v2
; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 3
; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 3
; CHECK-P8-NEXT: vmrghw v2, v3, v2
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: mffprd r3, f0
; CHECK-P8-NEXT: blr
@ -311,12 +308,12 @@ define i64 @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
; CHECK-P9-NEXT: xscvdpspn vs0, f0
; CHECK-P9-NEXT: vextubrx r3, r3, v2
; CHECK-P9-NEXT: extsb r3, r3
; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 1
; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3
; CHECK-P9-NEXT: mtfprwa f0, r3
; CHECK-P9-NEXT: xscvsxdsp f0, f0
; CHECK-P9-NEXT: xscvdpspn vs0, f0
; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 1
; CHECK-P9-NEXT: vmrglw v2, v2, v3
; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 3
; CHECK-P9-NEXT: vmrghw v2, v2, v3
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
@ -349,11 +346,10 @@ define <4 x float> @test4elt_signed(i32 %a.coerce) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test4elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r4, r2, .LCPI5_0@toc@ha
; CHECK-P8-NEXT: mtfprd f0, r3
; CHECK-P8-NEXT: addi r3, r4, .LCPI5_0@toc@l
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: lvx v3, 0, r3
; CHECK-P8-NEXT: vperm v2, v2, v2, v3
; CHECK-P8-NEXT: mtvsrwz v3, r3
; CHECK-P8-NEXT: addi r4, r4, .LCPI5_0@toc@l
; CHECK-P8-NEXT: lvx v2, 0, r4
; CHECK-P8-NEXT: vperm v2, v3, v3, v2
; CHECK-P8-NEXT: vspltisw v3, 12
; CHECK-P8-NEXT: vadduwm v3, v3, v3
; CHECK-P8-NEXT: vslw v2, v2, v3
@ -392,15 +388,14 @@ define void @test8elt_signed(<8 x float>* noalias nocapture sret %agg.result, i6
; CHECK-P8-LABEL: test8elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r5, r2, .LCPI6_0@toc@ha
; CHECK-P8-NEXT: mtfprd f0, r4
; CHECK-P8-NEXT: addis r4, r2, .LCPI6_1@toc@ha
; CHECK-P8-NEXT: addis r6, r2, .LCPI6_1@toc@ha
; CHECK-P8-NEXT: mtvsrd v3, r4
; CHECK-P8-NEXT: vspltisw v5, 12
; CHECK-P8-NEXT: addi r5, r5, .LCPI6_0@toc@l
; CHECK-P8-NEXT: addi r4, r4, .LCPI6_1@toc@l
; CHECK-P8-NEXT: lvx v2, 0, r5
; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: lvx v4, 0, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: addi r5, r5, .LCPI6_0@toc@l
; CHECK-P8-NEXT: lvx v2, 0, r5
; CHECK-P8-NEXT: addi r5, r6, .LCPI6_1@toc@l
; CHECK-P8-NEXT: lvx v4, 0, r5
; CHECK-P8-NEXT: vperm v2, v3, v3, v2
; CHECK-P8-NEXT: vperm v3, v3, v3, v4
; CHECK-P8-NEXT: vadduwm v4, v5, v5
@ -416,14 +411,13 @@ define void @test8elt_signed(<8 x float>* noalias nocapture sret %agg.result, i6
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtfprd f0, r4
; CHECK-P9-NEXT: mtvsrd v2, r4
; CHECK-P9-NEXT: addis r4, r2, .LCPI6_0@toc@ha
; CHECK-P9-NEXT: addi r4, r4, .LCPI6_0@toc@l
; CHECK-P9-NEXT: lxvx v3, 0, r4
; CHECK-P9-NEXT: xxswapd v2, vs0
; CHECK-P9-NEXT: vperm v3, v2, v2, v3
; CHECK-P9-NEXT: addis r4, r2, .LCPI6_1@toc@ha
; CHECK-P9-NEXT: addi r4, r4, .LCPI6_1@toc@l
; CHECK-P9-NEXT: vperm v3, v2, v2, v3
; CHECK-P9-NEXT: vextsb2w v3, v3
; CHECK-P9-NEXT: xvcvsxwsp vs0, v3
; CHECK-P9-NEXT: lxvx v3, 0, r4

View File

@ -13,11 +13,10 @@ define <2 x double> @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
; CHECK-P8-LABEL: test2elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r4, r2, .LCPI0_0@toc@ha
; CHECK-P8-NEXT: mtfprd f0, r3
; CHECK-P8-NEXT: addi r3, r4, .LCPI0_0@toc@l
; CHECK-P8-NEXT: mtvsrwz v2, r3
; CHECK-P8-NEXT: addi r4, r4, .LCPI0_0@toc@l
; CHECK-P8-NEXT: xxlxor v4, v4, v4
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: lvx v3, 0, r3
; CHECK-P8-NEXT: lvx v3, 0, r4
; CHECK-P8-NEXT: vperm v2, v4, v2, v3
; CHECK-P8-NEXT: xvcvuxddp v2, v2
; CHECK-P8-NEXT: blr
@ -53,19 +52,18 @@ define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, i32 %a.c
; CHECK-P8-LABEL: test4elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r5, r2, .LCPI1_0@toc@ha
; CHECK-P8-NEXT: mtfprd f0, r4
; CHECK-P8-NEXT: addis r4, r2, .LCPI1_1@toc@ha
; CHECK-P8-NEXT: addis r6, r2, .LCPI1_1@toc@ha
; CHECK-P8-NEXT: mtvsrwz v2, r4
; CHECK-P8-NEXT: addi r5, r5, .LCPI1_0@toc@l
; CHECK-P8-NEXT: addi r4, r4, .LCPI1_1@toc@l
; CHECK-P8-NEXT: addi r4, r6, .LCPI1_1@toc@l
; CHECK-P8-NEXT: xxlxor v4, v4, v4
; CHECK-P8-NEXT: lvx v2, 0, r5
; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: lvx v3, 0, r5
; CHECK-P8-NEXT: lvx v5, 0, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: vperm v2, v4, v3, v2
; CHECK-P8-NEXT: vperm v3, v4, v3, v5
; CHECK-P8-NEXT: xvcvuxddp vs0, v2
; CHECK-P8-NEXT: xvcvuxddp vs1, v3
; CHECK-P8-NEXT: vperm v3, v4, v2, v3
; CHECK-P8-NEXT: vperm v2, v4, v2, v5
; CHECK-P8-NEXT: xvcvuxddp vs0, v3
; CHECK-P8-NEXT: xvcvuxddp vs1, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
@ -118,33 +116,32 @@ define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, i64 %a.c
; CHECK-P8-LABEL: test8elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r5, r2, .LCPI2_0@toc@ha
; CHECK-P8-NEXT: mtfprd f0, r4
; CHECK-P8-NEXT: addis r4, r2, .LCPI2_2@toc@ha
; CHECK-P8-NEXT: addis r6, r2, .LCPI2_2@toc@ha
; CHECK-P8-NEXT: mtvsrd v2, r4
; CHECK-P8-NEXT: addis r4, r2, .LCPI2_3@toc@ha
; CHECK-P8-NEXT: addi r5, r5, .LCPI2_0@toc@l
; CHECK-P8-NEXT: addi r4, r4, .LCPI2_2@toc@l
; CHECK-P8-NEXT: addi r4, r4, .LCPI2_3@toc@l
; CHECK-P8-NEXT: xxlxor v4, v4, v4
; CHECK-P8-NEXT: lvx v2, 0, r5
; CHECK-P8-NEXT: addis r5, r2, .LCPI2_3@toc@ha
; CHECK-P8-NEXT: lvx v5, 0, r4
; CHECK-P8-NEXT: addis r4, r2, .LCPI2_1@toc@ha
; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: addi r5, r5, .LCPI2_3@toc@l
; CHECK-P8-NEXT: addi r4, r4, .LCPI2_1@toc@l
; CHECK-P8-NEXT: lvx v0, 0, r5
; CHECK-P8-NEXT: lvx v1, 0, r4
; CHECK-P8-NEXT: lvx v3, 0, r5
; CHECK-P8-NEXT: addi r5, r6, .LCPI2_2@toc@l
; CHECK-P8-NEXT: lvx v0, 0, r4
; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: lvx v5, 0, r5
; CHECK-P8-NEXT: addis r5, r2, .LCPI2_1@toc@ha
; CHECK-P8-NEXT: addi r5, r5, .LCPI2_1@toc@l
; CHECK-P8-NEXT: lvx v1, 0, r5
; CHECK-P8-NEXT: vperm v0, v4, v2, v0
; CHECK-P8-NEXT: li r5, 32
; CHECK-P8-NEXT: vperm v2, v4, v3, v2
; CHECK-P8-NEXT: vperm v5, v4, v3, v5
; CHECK-P8-NEXT: vperm v0, v4, v3, v0
; CHECK-P8-NEXT: vperm v3, v4, v3, v1
; CHECK-P8-NEXT: xvcvuxddp vs0, v2
; CHECK-P8-NEXT: xvcvuxddp vs1, v5
; CHECK-P8-NEXT: vperm v3, v4, v2, v3
; CHECK-P8-NEXT: vperm v5, v4, v2, v5
; CHECK-P8-NEXT: vperm v2, v4, v2, v1
; CHECK-P8-NEXT: xvcvuxddp vs2, v0
; CHECK-P8-NEXT: xvcvuxddp vs3, v3
; CHECK-P8-NEXT: xvcvuxddp vs0, v3
; CHECK-P8-NEXT: xvcvuxddp vs1, v5
; CHECK-P8-NEXT: xvcvuxddp vs3, v2
; CHECK-P8-NEXT: xxswapd vs2, vs2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: xxswapd vs2, vs2
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 16
@ -155,11 +152,10 @@ define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, i64 %a.c
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtfprd f0, r4
; CHECK-P9-NEXT: mtvsrd v2, r4
; CHECK-P9-NEXT: addis r4, r2, .LCPI2_0@toc@ha
; CHECK-P9-NEXT: addi r4, r4, .LCPI2_0@toc@l
; CHECK-P9-NEXT: lxvx v3, 0, r4
; CHECK-P9-NEXT: xxswapd v2, vs0
; CHECK-P9-NEXT: xxlxor v4, v4, v4
; CHECK-P9-NEXT: addis r4, r2, .LCPI2_1@toc@ha
; CHECK-P9-NEXT: addi r4, r4, .LCPI2_1@toc@l
@ -404,14 +400,13 @@ define <2 x double> @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
; CHECK-P8-LABEL: test2elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r4, r2, .LCPI4_0@toc@ha
; CHECK-P8-NEXT: mtfprd f0, r3
; CHECK-P8-NEXT: addi r3, r4, .LCPI4_0@toc@l
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: lvx v3, 0, r3
; CHECK-P8-NEXT: mtvsrwz v3, r3
; CHECK-P8-NEXT: addis r3, r2, .LCPI4_1@toc@ha
; CHECK-P8-NEXT: addi r4, r4, .LCPI4_0@toc@l
; CHECK-P8-NEXT: addi r3, r3, .LCPI4_1@toc@l
; CHECK-P8-NEXT: lvx v2, 0, r4
; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: vperm v2, v2, v2, v3
; CHECK-P8-NEXT: vperm v2, v3, v3, v2
; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: vsld v2, v2, v3
; CHECK-P8-NEXT: vsrad v2, v2, v3
@ -449,17 +444,16 @@ define void @test4elt_signed(<4 x double>* noalias nocapture sret %agg.result, i
; CHECK-P8-LABEL: test4elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r5, r2, .LCPI5_0@toc@ha
; CHECK-P8-NEXT: mtfprd f0, r4
; CHECK-P8-NEXT: addis r4, r2, .LCPI5_2@toc@ha
; CHECK-P8-NEXT: addi r5, r5, .LCPI5_0@toc@l
; CHECK-P8-NEXT: addi r4, r4, .LCPI5_2@toc@l
; CHECK-P8-NEXT: lvx v2, 0, r5
; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: lvx v4, 0, r4
; CHECK-P8-NEXT: addis r6, r2, .LCPI5_2@toc@ha
; CHECK-P8-NEXT: mtvsrwz v3, r4
; CHECK-P8-NEXT: addis r4, r2, .LCPI5_1@toc@ha
; CHECK-P8-NEXT: addi r5, r5, .LCPI5_0@toc@l
; CHECK-P8-NEXT: addi r4, r4, .LCPI5_1@toc@l
; CHECK-P8-NEXT: lvx v2, 0, r5
; CHECK-P8-NEXT: addi r5, r6, .LCPI5_2@toc@l
; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: lvx v4, 0, r5
; CHECK-P8-NEXT: vperm v2, v3, v3, v2
; CHECK-P8-NEXT: vperm v3, v3, v3, v4
; CHECK-P8-NEXT: xxswapd v4, vs0
@ -523,26 +517,25 @@ entry:
define void @test8elt_signed(<8 x double>* noalias nocapture sret %agg.result, i64 %a.coerce) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test8elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mtfprd f0, r4
; CHECK-P8-NEXT: addis r4, r2, .LCPI6_2@toc@ha
; CHECK-P8-NEXT: addis r5, r2, .LCPI6_0@toc@ha
; CHECK-P8-NEXT: addis r6, r2, .LCPI6_3@toc@ha
; CHECK-P8-NEXT: addi r4, r4, .LCPI6_2@toc@l
; CHECK-P8-NEXT: addis r6, r2, .LCPI6_2@toc@ha
; CHECK-P8-NEXT: mtvsrd v3, r4
; CHECK-P8-NEXT: addis r4, r2, .LCPI6_1@toc@ha
; CHECK-P8-NEXT: addi r5, r5, .LCPI6_0@toc@l
; CHECK-P8-NEXT: addi r6, r6, .LCPI6_3@toc@l
; CHECK-P8-NEXT: lvx v4, 0, r4
; CHECK-P8-NEXT: addis r4, r2, .LCPI6_4@toc@ha
; CHECK-P8-NEXT: addi r6, r6, .LCPI6_2@toc@l
; CHECK-P8-NEXT: addi r4, r4, .LCPI6_1@toc@l
; CHECK-P8-NEXT: lvx v2, 0, r5
; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: lvx v5, 0, r6
; CHECK-P8-NEXT: addis r5, r2, .LCPI6_1@toc@ha
; CHECK-P8-NEXT: addi r4, r4, .LCPI6_4@toc@l
; CHECK-P8-NEXT: addi r5, r5, .LCPI6_1@toc@l
; CHECK-P8-NEXT: lvx v0, 0, r4
; CHECK-P8-NEXT: lxvd2x vs0, 0, r5
; CHECK-P8-NEXT: addis r5, r2, .LCPI6_3@toc@ha
; CHECK-P8-NEXT: lvx v4, 0, r6
; CHECK-P8-NEXT: addis r6, r2, .LCPI6_4@toc@ha
; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: li r5, 32
; CHECK-P8-NEXT: addi r5, r5, .LCPI6_3@toc@l
; CHECK-P8-NEXT: lvx v5, 0, r5
; CHECK-P8-NEXT: addi r5, r6, .LCPI6_4@toc@l
; CHECK-P8-NEXT: lvx v0, 0, r5
; CHECK-P8-NEXT: vperm v2, v3, v3, v2
; CHECK-P8-NEXT: li r5, 32
; CHECK-P8-NEXT: vperm v4, v3, v3, v4
; CHECK-P8-NEXT: vperm v5, v3, v3, v5
; CHECK-P8-NEXT: vperm v3, v3, v3, v0
@ -572,14 +565,13 @@ define void @test8elt_signed(<8 x double>* noalias nocapture sret %agg.result, i
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtfprd f0, r4
; CHECK-P9-NEXT: mtvsrd v2, r4
; CHECK-P9-NEXT: addis r4, r2, .LCPI6_0@toc@ha
; CHECK-P9-NEXT: addi r4, r4, .LCPI6_0@toc@l
; CHECK-P9-NEXT: lxvx v3, 0, r4
; CHECK-P9-NEXT: xxswapd v2, vs0
; CHECK-P9-NEXT: vperm v3, v2, v2, v3
; CHECK-P9-NEXT: addis r4, r2, .LCPI6_1@toc@ha
; CHECK-P9-NEXT: addi r4, r4, .LCPI6_1@toc@l
; CHECK-P9-NEXT: vperm v3, v2, v2, v3
; CHECK-P9-NEXT: vextsb2d v3, v3
; CHECK-P9-NEXT: xvcvsxddp vs0, v3
; CHECK-P9-NEXT: lxvx v3, 0, r4

View File

@ -82,10 +82,10 @@ define <3 x float> @constrained_vector_fdiv_v3f32() #0 {
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: xscvdpspn 2, 2
; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xxsldwi 34, 1, 1, 1
; PC64LE-NEXT: xxsldwi 35, 2, 2, 1
; PC64LE-NEXT: vmrglw 2, 3, 2
; PC64LE-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE-NEXT: xxsldwi 34, 1, 1, 3
; PC64LE-NEXT: xxsldwi 35, 2, 2, 3
; PC64LE-NEXT: vmrghw 2, 3, 2
; PC64LE-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE-NEXT: vperm 2, 3, 2, 4
; PC64LE-NEXT: blr
;
@ -106,12 +106,12 @@ define <3 x float> @constrained_vector_fdiv_v3f32() #0 {
; PC64LE9-NEXT: xsdivsp 2, 2, 0
; PC64LE9-NEXT: xsdivsp 0, 3, 0
; PC64LE9-NEXT: xscvdpspn 0, 0
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 2
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 1
; PC64LE9-NEXT: vmrglw 2, 3, 2
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: vmrghw 2, 3, 2
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: vperm 2, 3, 2, 4
; PC64LE9-NEXT: blr
entry:
@ -359,11 +359,11 @@ define <3 x float> @constrained_vector_frem_v3f32() #0 {
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI7_4@toc@l
; PC64LE-NEXT: lvx 4, 0, 3
; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE-NEXT: xscvdpspn 0, 30
; PC64LE-NEXT: xxsldwi 35, 1, 1, 1
; PC64LE-NEXT: vmrglw 2, 2, 3
; PC64LE-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE-NEXT: xxsldwi 35, 1, 1, 3
; PC64LE-NEXT: vmrghw 2, 2, 3
; PC64LE-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE-NEXT: vperm 2, 3, 2, 4
; PC64LE-NEXT: addi 1, 1, 64
; PC64LE-NEXT: ld 0, 16(1)
@ -401,15 +401,15 @@ define <3 x float> @constrained_vector_frem_v3f32() #0 {
; PC64LE9-NEXT: bl fmodf
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: xscvdpspn 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 29
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 30
; PC64LE9-NEXT: addis 3, 2, .LCPI7_4@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI7_4@toc@l
; PC64LE9-NEXT: lxvx 36, 0, 3
; PC64LE9-NEXT: vmrglw 2, 3, 2
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: vmrghw 2, 3, 2
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: vperm 2, 3, 2, 4
; PC64LE9-NEXT: addi 1, 1, 64
; PC64LE9-NEXT: ld 0, 16(1)
@ -710,10 +710,10 @@ define <3 x float> @constrained_vector_fmul_v3f32() #0 {
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: xscvdpspn 2, 2
; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xxsldwi 34, 1, 1, 1
; PC64LE-NEXT: xxsldwi 35, 2, 2, 1
; PC64LE-NEXT: vmrglw 2, 3, 2
; PC64LE-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE-NEXT: xxsldwi 34, 1, 1, 3
; PC64LE-NEXT: xxsldwi 35, 2, 2, 3
; PC64LE-NEXT: vmrghw 2, 3, 2
; PC64LE-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE-NEXT: vperm 2, 3, 2, 4
; PC64LE-NEXT: blr
;
@ -735,11 +735,11 @@ define <3 x float> @constrained_vector_fmul_v3f32() #0 {
; PC64LE9-NEXT: xsmulsp 1, 1, 3
; PC64LE9-NEXT: xscvdpspn 0, 0
; PC64LE9-NEXT: xscvdpspn 1, 1
; PC64LE9-NEXT: xxsldwi 34, 1, 1, 1
; PC64LE9-NEXT: xxsldwi 34, 1, 1, 3
; PC64LE9-NEXT: xscvdpspn 1, 2
; PC64LE9-NEXT: xxsldwi 35, 1, 1, 1
; PC64LE9-NEXT: vmrglw 2, 3, 2
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 35, 1, 1, 3
; PC64LE9-NEXT: vmrghw 2, 3, 2
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: vperm 2, 3, 2, 4
; PC64LE9-NEXT: blr
entry:
@ -925,10 +925,10 @@ define <3 x float> @constrained_vector_fadd_v3f32() #0 {
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: xscvdpspn 2, 2
; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xxsldwi 34, 1, 1, 1
; PC64LE-NEXT: xxsldwi 35, 2, 2, 1
; PC64LE-NEXT: vmrglw 2, 3, 2
; PC64LE-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE-NEXT: xxsldwi 34, 1, 1, 3
; PC64LE-NEXT: xxsldwi 35, 2, 2, 3
; PC64LE-NEXT: vmrghw 2, 3, 2
; PC64LE-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE-NEXT: vperm 2, 3, 2, 4
; PC64LE-NEXT: blr
;
@ -945,15 +945,15 @@ define <3 x float> @constrained_vector_fadd_v3f32() #0 {
; PC64LE9-NEXT: xsaddsp 1, 0, 1
; PC64LE9-NEXT: xsaddsp 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 0
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 2
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 1
; PC64LE9-NEXT: addis 3, 2, .LCPI17_3@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI17_3@toc@l
; PC64LE9-NEXT: lxvx 36, 0, 3
; PC64LE9-NEXT: vmrglw 2, 3, 2
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: vmrghw 2, 3, 2
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: vperm 2, 3, 2, 4
; PC64LE9-NEXT: blr
entry:
@ -1137,10 +1137,10 @@ define <3 x float> @constrained_vector_fsub_v3f32() #0 {
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: xscvdpspn 2, 2
; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xxsldwi 34, 1, 1, 1
; PC64LE-NEXT: xxsldwi 35, 2, 2, 1
; PC64LE-NEXT: vmrglw 2, 3, 2
; PC64LE-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE-NEXT: xxsldwi 34, 1, 1, 3
; PC64LE-NEXT: xxsldwi 35, 2, 2, 3
; PC64LE-NEXT: vmrghw 2, 3, 2
; PC64LE-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE-NEXT: vperm 2, 3, 2, 4
; PC64LE-NEXT: blr
;
@ -1157,15 +1157,15 @@ define <3 x float> @constrained_vector_fsub_v3f32() #0 {
; PC64LE9-NEXT: xssubsp 1, 0, 1
; PC64LE9-NEXT: xssubsp 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 0
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 2
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 1
; PC64LE9-NEXT: addis 3, 2, .LCPI22_3@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI22_3@toc@l
; PC64LE9-NEXT: lxvx 36, 0, 3
; PC64LE9-NEXT: vmrglw 2, 3, 2
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: vmrghw 2, 3, 2
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: vperm 2, 3, 2, 4
; PC64LE9-NEXT: blr
entry:
@ -1333,12 +1333,12 @@ define <3 x float> @constrained_vector_sqrt_v3f32() #0 {
; PC64LE-NEXT: xssqrtsp 2, 2
; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE-NEXT: xscvdpspn 0, 2
; PC64LE-NEXT: xxsldwi 35, 1, 1, 1
; PC64LE-NEXT: vmrglw 2, 3, 2
; PC64LE-NEXT: xxsldwi 35, 1, 1, 3
; PC64LE-NEXT: vmrghw 2, 3, 2
; PC64LE-NEXT: lvx 3, 0, 3
; PC64LE-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE-NEXT: xxsldwi 36, 0, 0, 3
; PC64LE-NEXT: vperm 2, 4, 2, 3
; PC64LE-NEXT: blr
;
@ -1358,10 +1358,10 @@ define <3 x float> @constrained_vector_sqrt_v3f32() #0 {
; PC64LE9-NEXT: xscvdpspn 0, 0
; PC64LE9-NEXT: xscvdpspn 1, 1
; PC64LE9-NEXT: xscvdpspn 2, 2
; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 35, 1, 1, 1
; PC64LE9-NEXT: xxsldwi 34, 2, 2, 1
; PC64LE9-NEXT: vmrglw 2, 3, 2
; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3
; PC64LE9-NEXT: xxsldwi 35, 1, 1, 3
; PC64LE9-NEXT: xxsldwi 34, 2, 2, 3
; PC64LE9-NEXT: vmrghw 2, 3, 2
; PC64LE9-NEXT: lxvx 35, 0, 3
; PC64LE9-NEXT: vperm 2, 4, 2, 3
; PC64LE9-NEXT: blr
@ -1588,11 +1588,11 @@ define <3 x float> @constrained_vector_pow_v3f32() #0 {
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI32_4@toc@l
; PC64LE-NEXT: lvx 4, 0, 3
; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE-NEXT: xscvdpspn 0, 30
; PC64LE-NEXT: xxsldwi 35, 1, 1, 1
; PC64LE-NEXT: vmrglw 2, 2, 3
; PC64LE-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE-NEXT: xxsldwi 35, 1, 1, 3
; PC64LE-NEXT: vmrghw 2, 2, 3
; PC64LE-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE-NEXT: vperm 2, 3, 2, 4
; PC64LE-NEXT: addi 1, 1, 64
; PC64LE-NEXT: ld 0, 16(1)
@ -1630,15 +1630,15 @@ define <3 x float> @constrained_vector_pow_v3f32() #0 {
; PC64LE9-NEXT: bl powf
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: xscvdpspn 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 29
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 30
; PC64LE9-NEXT: addis 3, 2, .LCPI32_4@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI32_4@toc@l
; PC64LE9-NEXT: lxvx 36, 0, 3
; PC64LE9-NEXT: vmrglw 2, 3, 2
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: vmrghw 2, 3, 2
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: vperm 2, 3, 2, 4
; PC64LE9-NEXT: addi 1, 1, 64
; PC64LE9-NEXT: ld 0, 16(1)
@ -1992,11 +1992,11 @@ define <3 x float> @constrained_vector_powi_v3f32() #0 {
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI37_3@toc@l
; PC64LE-NEXT: lvx 4, 0, 3
; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE-NEXT: xscvdpspn 0, 31
; PC64LE-NEXT: xxsldwi 35, 1, 1, 1
; PC64LE-NEXT: vmrglw 2, 2, 3
; PC64LE-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE-NEXT: xxsldwi 35, 1, 1, 3
; PC64LE-NEXT: vmrghw 2, 2, 3
; PC64LE-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE-NEXT: vperm 2, 3, 2, 4
; PC64LE-NEXT: addi 1, 1, 48
; PC64LE-NEXT: ld 0, 16(1)
@ -2030,15 +2030,15 @@ define <3 x float> @constrained_vector_powi_v3f32() #0 {
; PC64LE9-NEXT: bl __powisf2
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: xscvdpspn 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 30
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 31
; PC64LE9-NEXT: addis 3, 2, .LCPI37_3@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI37_3@toc@l
; PC64LE9-NEXT: lxvx 36, 0, 3
; PC64LE9-NEXT: vmrglw 2, 3, 2
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: vmrghw 2, 3, 2
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: vperm 2, 3, 2, 4
; PC64LE9-NEXT: addi 1, 1, 48
; PC64LE9-NEXT: ld 0, 16(1)
@ -2360,12 +2360,12 @@ define <3 x float> @constrained_vector_sin_v3f32() #0 {
; PC64LE-NEXT: addis 3, 2, .LCPI42_3@toc@ha
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI42_3@toc@l
; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE-NEXT: xscvdpspn 0, 31
; PC64LE-NEXT: xxsldwi 35, 1, 1, 1
; PC64LE-NEXT: vmrglw 2, 2, 3
; PC64LE-NEXT: xxsldwi 35, 1, 1, 3
; PC64LE-NEXT: vmrghw 2, 2, 3
; PC64LE-NEXT: lvx 3, 0, 3
; PC64LE-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE-NEXT: xxsldwi 36, 0, 0, 3
; PC64LE-NEXT: vperm 2, 4, 2, 3
; PC64LE-NEXT: addi 1, 1, 48
; PC64LE-NEXT: ld 0, 16(1)
@ -2396,15 +2396,15 @@ define <3 x float> @constrained_vector_sin_v3f32() #0 {
; PC64LE9-NEXT: bl sinf
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: xscvdpspn 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 30
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 31
; PC64LE9-NEXT: addis 3, 2, .LCPI42_3@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI42_3@toc@l
; PC64LE9-NEXT: vmrglw 2, 3, 2
; PC64LE9-NEXT: vmrghw 2, 3, 2
; PC64LE9-NEXT: lxvx 35, 0, 3
; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3
; PC64LE9-NEXT: vperm 2, 4, 2, 3
; PC64LE9-NEXT: addi 1, 1, 48
; PC64LE9-NEXT: ld 0, 16(1)
@ -2709,12 +2709,12 @@ define <3 x float> @constrained_vector_cos_v3f32() #0 {
; PC64LE-NEXT: addis 3, 2, .LCPI47_3@toc@ha
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI47_3@toc@l
; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE-NEXT: xscvdpspn 0, 31
; PC64LE-NEXT: xxsldwi 35, 1, 1, 1
; PC64LE-NEXT: vmrglw 2, 2, 3
; PC64LE-NEXT: xxsldwi 35, 1, 1, 3
; PC64LE-NEXT: vmrghw 2, 2, 3
; PC64LE-NEXT: lvx 3, 0, 3
; PC64LE-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE-NEXT: xxsldwi 36, 0, 0, 3
; PC64LE-NEXT: vperm 2, 4, 2, 3
; PC64LE-NEXT: addi 1, 1, 48
; PC64LE-NEXT: ld 0, 16(1)
@ -2745,15 +2745,15 @@ define <3 x float> @constrained_vector_cos_v3f32() #0 {
; PC64LE9-NEXT: bl cosf
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: xscvdpspn 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 30
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 31
; PC64LE9-NEXT: addis 3, 2, .LCPI47_3@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI47_3@toc@l
; PC64LE9-NEXT: vmrglw 2, 3, 2
; PC64LE9-NEXT: vmrghw 2, 3, 2
; PC64LE9-NEXT: lxvx 35, 0, 3
; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3
; PC64LE9-NEXT: vperm 2, 4, 2, 3
; PC64LE9-NEXT: addi 1, 1, 48
; PC64LE9-NEXT: ld 0, 16(1)
@ -3058,12 +3058,12 @@ define <3 x float> @constrained_vector_exp_v3f32() #0 {
; PC64LE-NEXT: addis 3, 2, .LCPI52_3@toc@ha
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI52_3@toc@l
; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE-NEXT: xscvdpspn 0, 31
; PC64LE-NEXT: xxsldwi 35, 1, 1, 1
; PC64LE-NEXT: vmrglw 2, 2, 3
; PC64LE-NEXT: xxsldwi 35, 1, 1, 3
; PC64LE-NEXT: vmrghw 2, 2, 3
; PC64LE-NEXT: lvx 3, 0, 3
; PC64LE-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE-NEXT: xxsldwi 36, 0, 0, 3
; PC64LE-NEXT: vperm 2, 4, 2, 3
; PC64LE-NEXT: addi 1, 1, 48
; PC64LE-NEXT: ld 0, 16(1)
@ -3094,15 +3094,15 @@ define <3 x float> @constrained_vector_exp_v3f32() #0 {
; PC64LE9-NEXT: bl expf
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: xscvdpspn 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 30
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 31
; PC64LE9-NEXT: addis 3, 2, .LCPI52_3@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI52_3@toc@l
; PC64LE9-NEXT: vmrglw 2, 3, 2
; PC64LE9-NEXT: vmrghw 2, 3, 2
; PC64LE9-NEXT: lxvx 35, 0, 3
; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3
; PC64LE9-NEXT: vperm 2, 4, 2, 3
; PC64LE9-NEXT: addi 1, 1, 48
; PC64LE9-NEXT: ld 0, 16(1)
@ -3407,12 +3407,12 @@ define <3 x float> @constrained_vector_exp2_v3f32() #0 {
; PC64LE-NEXT: addis 3, 2, .LCPI57_3@toc@ha
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI57_3@toc@l
; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE-NEXT: xscvdpspn 0, 31
; PC64LE-NEXT: xxsldwi 35, 1, 1, 1
; PC64LE-NEXT: vmrglw 2, 2, 3
; PC64LE-NEXT: xxsldwi 35, 1, 1, 3
; PC64LE-NEXT: vmrghw 2, 2, 3
; PC64LE-NEXT: lvx 3, 0, 3
; PC64LE-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE-NEXT: xxsldwi 36, 0, 0, 3
; PC64LE-NEXT: vperm 2, 4, 2, 3
; PC64LE-NEXT: addi 1, 1, 48
; PC64LE-NEXT: ld 0, 16(1)
@ -3443,15 +3443,15 @@ define <3 x float> @constrained_vector_exp2_v3f32() #0 {
; PC64LE9-NEXT: bl exp2f
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: xscvdpspn 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 30
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 31
; PC64LE9-NEXT: addis 3, 2, .LCPI57_3@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI57_3@toc@l
; PC64LE9-NEXT: vmrglw 2, 3, 2
; PC64LE9-NEXT: vmrghw 2, 3, 2
; PC64LE9-NEXT: lxvx 35, 0, 3
; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3
; PC64LE9-NEXT: vperm 2, 4, 2, 3
; PC64LE9-NEXT: addi 1, 1, 48
; PC64LE9-NEXT: ld 0, 16(1)
@ -3756,12 +3756,12 @@ define <3 x float> @constrained_vector_log_v3f32() #0 {
; PC64LE-NEXT: addis 3, 2, .LCPI62_3@toc@ha
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI62_3@toc@l
; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE-NEXT: xscvdpspn 0, 31
; PC64LE-NEXT: xxsldwi 35, 1, 1, 1
; PC64LE-NEXT: vmrglw 2, 2, 3
; PC64LE-NEXT: xxsldwi 35, 1, 1, 3
; PC64LE-NEXT: vmrghw 2, 2, 3
; PC64LE-NEXT: lvx 3, 0, 3
; PC64LE-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE-NEXT: xxsldwi 36, 0, 0, 3
; PC64LE-NEXT: vperm 2, 4, 2, 3
; PC64LE-NEXT: addi 1, 1, 48
; PC64LE-NEXT: ld 0, 16(1)
@ -3792,15 +3792,15 @@ define <3 x float> @constrained_vector_log_v3f32() #0 {
; PC64LE9-NEXT: bl logf
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: xscvdpspn 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 30
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 31
; PC64LE9-NEXT: addis 3, 2, .LCPI62_3@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI62_3@toc@l
; PC64LE9-NEXT: vmrglw 2, 3, 2
; PC64LE9-NEXT: vmrghw 2, 3, 2
; PC64LE9-NEXT: lxvx 35, 0, 3
; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3
; PC64LE9-NEXT: vperm 2, 4, 2, 3
; PC64LE9-NEXT: addi 1, 1, 48
; PC64LE9-NEXT: ld 0, 16(1)
@ -4105,12 +4105,12 @@ define <3 x float> @constrained_vector_log10_v3f32() #0 {
; PC64LE-NEXT: addis 3, 2, .LCPI67_3@toc@ha
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI67_3@toc@l
; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE-NEXT: xscvdpspn 0, 31
; PC64LE-NEXT: xxsldwi 35, 1, 1, 1
; PC64LE-NEXT: vmrglw 2, 2, 3
; PC64LE-NEXT: xxsldwi 35, 1, 1, 3
; PC64LE-NEXT: vmrghw 2, 2, 3
; PC64LE-NEXT: lvx 3, 0, 3
; PC64LE-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE-NEXT: xxsldwi 36, 0, 0, 3
; PC64LE-NEXT: vperm 2, 4, 2, 3
; PC64LE-NEXT: addi 1, 1, 48
; PC64LE-NEXT: ld 0, 16(1)
@ -4141,15 +4141,15 @@ define <3 x float> @constrained_vector_log10_v3f32() #0 {
; PC64LE9-NEXT: bl log10f
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: xscvdpspn 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 30
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 31
; PC64LE9-NEXT: addis 3, 2, .LCPI67_3@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI67_3@toc@l
; PC64LE9-NEXT: vmrglw 2, 3, 2
; PC64LE9-NEXT: vmrghw 2, 3, 2
; PC64LE9-NEXT: lxvx 35, 0, 3
; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3
; PC64LE9-NEXT: vperm 2, 4, 2, 3
; PC64LE9-NEXT: addi 1, 1, 48
; PC64LE9-NEXT: ld 0, 16(1)
@ -4454,12 +4454,12 @@ define <3 x float> @constrained_vector_log2_v3f32() #0 {
; PC64LE-NEXT: addis 3, 2, .LCPI72_3@toc@ha
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI72_3@toc@l
; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE-NEXT: xscvdpspn 0, 31
; PC64LE-NEXT: xxsldwi 35, 1, 1, 1
; PC64LE-NEXT: vmrglw 2, 2, 3
; PC64LE-NEXT: xxsldwi 35, 1, 1, 3
; PC64LE-NEXT: vmrghw 2, 2, 3
; PC64LE-NEXT: lvx 3, 0, 3
; PC64LE-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE-NEXT: xxsldwi 36, 0, 0, 3
; PC64LE-NEXT: vperm 2, 4, 2, 3
; PC64LE-NEXT: addi 1, 1, 48
; PC64LE-NEXT: ld 0, 16(1)
@ -4490,15 +4490,15 @@ define <3 x float> @constrained_vector_log2_v3f32() #0 {
; PC64LE9-NEXT: bl log2f
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: xscvdpspn 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 30
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 31
; PC64LE9-NEXT: addis 3, 2, .LCPI72_3@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI72_3@toc@l
; PC64LE9-NEXT: vmrglw 2, 3, 2
; PC64LE9-NEXT: vmrghw 2, 3, 2
; PC64LE9-NEXT: lxvx 35, 0, 3
; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3
; PC64LE9-NEXT: vperm 2, 4, 2, 3
; PC64LE9-NEXT: addi 1, 1, 48
; PC64LE9-NEXT: ld 0, 16(1)
@ -4748,12 +4748,12 @@ define <3 x float> @constrained_vector_rint_v3f32() #0 {
; PC64LE-NEXT: xsrdpic 2, 2
; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE-NEXT: xscvdpspn 0, 2
; PC64LE-NEXT: xxsldwi 35, 1, 1, 1
; PC64LE-NEXT: vmrglw 2, 3, 2
; PC64LE-NEXT: xxsldwi 35, 1, 1, 3
; PC64LE-NEXT: vmrghw 2, 3, 2
; PC64LE-NEXT: lvx 3, 0, 3
; PC64LE-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE-NEXT: xxsldwi 36, 0, 0, 3
; PC64LE-NEXT: vperm 2, 4, 2, 3
; PC64LE-NEXT: blr
;
@ -4773,10 +4773,10 @@ define <3 x float> @constrained_vector_rint_v3f32() #0 {
; PC64LE9-NEXT: xscvdpspn 0, 0
; PC64LE9-NEXT: xscvdpspn 1, 1
; PC64LE9-NEXT: xscvdpspn 2, 2
; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 35, 1, 1, 1
; PC64LE9-NEXT: xxsldwi 34, 2, 2, 1
; PC64LE9-NEXT: vmrglw 2, 3, 2
; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3
; PC64LE9-NEXT: xxsldwi 35, 1, 1, 3
; PC64LE9-NEXT: xxsldwi 34, 2, 2, 3
; PC64LE9-NEXT: vmrghw 2, 3, 2
; PC64LE9-NEXT: lxvx 35, 0, 3
; PC64LE9-NEXT: vperm 2, 4, 2, 3
; PC64LE9-NEXT: blr
@ -4947,12 +4947,12 @@ define <3 x float> @constrained_vector_nearbyint_v3f32() #0 {
; PC64LE-NEXT: addis 3, 2, .LCPI82_3@toc@ha
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI82_3@toc@l
; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE-NEXT: xscvdpspn 0, 31
; PC64LE-NEXT: xxsldwi 35, 1, 1, 1
; PC64LE-NEXT: vmrglw 2, 2, 3
; PC64LE-NEXT: xxsldwi 35, 1, 1, 3
; PC64LE-NEXT: vmrghw 2, 2, 3
; PC64LE-NEXT: lvx 3, 0, 3
; PC64LE-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE-NEXT: xxsldwi 36, 0, 0, 3
; PC64LE-NEXT: vperm 2, 4, 2, 3
; PC64LE-NEXT: addi 1, 1, 48
; PC64LE-NEXT: ld 0, 16(1)
@ -4983,15 +4983,15 @@ define <3 x float> @constrained_vector_nearbyint_v3f32() #0 {
; PC64LE9-NEXT: bl nearbyintf
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: xscvdpspn 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 30
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 31
; PC64LE9-NEXT: addis 3, 2, .LCPI82_3@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI82_3@toc@l
; PC64LE9-NEXT: vmrglw 2, 3, 2
; PC64LE9-NEXT: vmrghw 2, 3, 2
; PC64LE9-NEXT: lxvx 35, 0, 3
; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3
; PC64LE9-NEXT: vperm 2, 4, 2, 3
; PC64LE9-NEXT: addi 1, 1, 48
; PC64LE9-NEXT: ld 0, 16(1)
@ -5184,11 +5184,11 @@ define <3 x float> @constrained_vector_maxnum_v3f32() #0 {
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI87_5@toc@l
; PC64LE-NEXT: lvx 4, 0, 3
; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE-NEXT: xscvdpspn 0, 30
; PC64LE-NEXT: xxsldwi 35, 1, 1, 1
; PC64LE-NEXT: vmrglw 2, 2, 3
; PC64LE-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE-NEXT: xxsldwi 35, 1, 1, 3
; PC64LE-NEXT: vmrghw 2, 2, 3
; PC64LE-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE-NEXT: vperm 2, 3, 2, 4
; PC64LE-NEXT: addi 1, 1, 64
; PC64LE-NEXT: ld 0, 16(1)
@ -5227,15 +5227,15 @@ define <3 x float> @constrained_vector_maxnum_v3f32() #0 {
; PC64LE9-NEXT: bl fmaxf
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: xscvdpspn 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 29
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 30
; PC64LE9-NEXT: addis 3, 2, .LCPI87_5@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI87_5@toc@l
; PC64LE9-NEXT: lxvx 36, 0, 3
; PC64LE9-NEXT: vmrglw 2, 3, 2
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: vmrghw 2, 3, 2
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: vperm 2, 3, 2, 4
; PC64LE9-NEXT: addi 1, 1, 64
; PC64LE9-NEXT: ld 0, 16(1)
@ -5471,11 +5471,11 @@ define <3 x float> @constrained_vector_minnum_v3f32() #0 {
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI92_5@toc@l
; PC64LE-NEXT: lvx 4, 0, 3
; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE-NEXT: xscvdpspn 0, 30
; PC64LE-NEXT: xxsldwi 35, 1, 1, 1
; PC64LE-NEXT: vmrglw 2, 2, 3
; PC64LE-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE-NEXT: xxsldwi 35, 1, 1, 3
; PC64LE-NEXT: vmrghw 2, 2, 3
; PC64LE-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE-NEXT: vperm 2, 3, 2, 4
; PC64LE-NEXT: addi 1, 1, 64
; PC64LE-NEXT: ld 0, 16(1)
@ -5514,15 +5514,15 @@ define <3 x float> @constrained_vector_minnum_v3f32() #0 {
; PC64LE9-NEXT: bl fminf
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: xscvdpspn 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 29
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: xscvdpspn 0, 30
; PC64LE9-NEXT: addis 3, 2, .LCPI92_5@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI92_5@toc@l
; PC64LE9-NEXT: lxvx 36, 0, 3
; PC64LE9-NEXT: vmrglw 2, 3, 2
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: vmrghw 2, 3, 2
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: vperm 2, 3, 2, 4
; PC64LE9-NEXT: addi 1, 1, 64
; PC64LE9-NEXT: ld 0, 16(1)
@ -5686,9 +5686,9 @@ define <2 x float> @constrained_vector_fptrunc_v2f64() #0 {
; PC64LE-NEXT: xsrsp 1, 1
; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE-NEXT: xxsldwi 35, 1, 1, 1
; PC64LE-NEXT: vmrglw 2, 3, 2
; PC64LE-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE-NEXT: xxsldwi 35, 1, 1, 3
; PC64LE-NEXT: vmrghw 2, 3, 2
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_fptrunc_v2f64:
@ -5698,12 +5698,12 @@ define <2 x float> @constrained_vector_fptrunc_v2f64() #0 {
; PC64LE9-NEXT: addis 3, 2, .LCPI96_1@toc@ha
; PC64LE9-NEXT: xsrsp 0, 0
; PC64LE9-NEXT: xscvdpspn 0, 0
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE9-NEXT: lfd 0, .LCPI96_1@toc@l(3)
; PC64LE9-NEXT: xsrsp 0, 0
; PC64LE9-NEXT: xscvdpspn 0, 0
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: vmrglw 2, 3, 2
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: vmrghw 2, 3, 2
; PC64LE9-NEXT: blr
entry:
%result = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(
@ -5729,12 +5729,12 @@ define <3 x float> @constrained_vector_fptrunc_v3f64() #0 {
; PC64LE-NEXT: xsrsp 2, 2
; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE-NEXT: xscvdpspn 0, 2
; PC64LE-NEXT: xxsldwi 35, 1, 1, 1
; PC64LE-NEXT: vmrglw 2, 3, 2
; PC64LE-NEXT: xxsldwi 35, 1, 1, 3
; PC64LE-NEXT: vmrghw 2, 3, 2
; PC64LE-NEXT: lvx 3, 0, 3
; PC64LE-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE-NEXT: xxsldwi 36, 0, 0, 3
; PC64LE-NEXT: vperm 2, 4, 2, 3
; PC64LE-NEXT: blr
;
@ -5745,20 +5745,20 @@ define <3 x float> @constrained_vector_fptrunc_v3f64() #0 {
; PC64LE9-NEXT: addis 3, 2, .LCPI97_1@toc@ha
; PC64LE9-NEXT: xsrsp 0, 0
; PC64LE9-NEXT: xscvdpspn 0, 0
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3
; PC64LE9-NEXT: lfd 0, .LCPI97_1@toc@l(3)
; PC64LE9-NEXT: addis 3, 2, .LCPI97_2@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI97_2@toc@l
; PC64LE9-NEXT: xsrsp 0, 0
; PC64LE9-NEXT: xscvdpspn 0, 0
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: vmrglw 2, 3, 2
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3
; PC64LE9-NEXT: vmrghw 2, 3, 2
; PC64LE9-NEXT: lxvx 35, 0, 3
; PC64LE9-NEXT: addis 3, 2, .LCPI97_3@toc@ha
; PC64LE9-NEXT: lfd 0, .LCPI97_3@toc@l(3)
; PC64LE9-NEXT: xsrsp 0, 0
; PC64LE9-NEXT: xscvdpspn 0, 0
; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3
; PC64LE9-NEXT: vperm 2, 4, 2, 3
; PC64LE9-NEXT: blr
entry:

View File

@ -1404,9 +1404,9 @@ define <2 x float> @test44(<2 x i64> %a) {
; CHECK-LE-NEXT: xscvuxdsp f0, f0
; CHECK-LE-NEXT: xscvdpspn vs1, f1
; CHECK-LE-NEXT: xscvdpspn vs0, f0
; CHECK-LE-NEXT: xxsldwi v3, vs1, vs1, 1
; CHECK-LE-NEXT: xxsldwi v2, vs0, vs0, 1
; CHECK-LE-NEXT: vmrglw v2, v3, v2
; CHECK-LE-NEXT: xxsldwi v3, vs1, vs1, 3
; CHECK-LE-NEXT: xxsldwi v2, vs0, vs0, 3
; CHECK-LE-NEXT: vmrghw v2, v3, v2
; CHECK-LE-NEXT: blr
%v = uitofp <2 x i64> %a to <2 x float>
ret <2 x float> %v
@ -1486,9 +1486,9 @@ define <2 x float> @test45(<2 x i64> %a) {
; CHECK-LE-NEXT: xscvsxdsp f0, f0
; CHECK-LE-NEXT: xscvdpspn vs1, f1
; CHECK-LE-NEXT: xscvdpspn vs0, f0
; CHECK-LE-NEXT: xxsldwi v3, vs1, vs1, 1
; CHECK-LE-NEXT: xxsldwi v2, vs0, vs0, 1
; CHECK-LE-NEXT: vmrglw v2, v3, v2
; CHECK-LE-NEXT: xxsldwi v3, vs1, vs1, 3
; CHECK-LE-NEXT: xxsldwi v2, vs0, vs0, 3
; CHECK-LE-NEXT: vmrghw v2, v3, v2
; CHECK-LE-NEXT: blr
%v = sitofp <2 x i64> %a to <2 x float>
ret <2 x float> %v
@ -2437,12 +2437,11 @@ define <2 x i32> @test80(i32 %v) {
;
; CHECK-LE-LABEL: test80:
; CHECK-LE: # %bb.0:
; CHECK-LE-NEXT: mtfprd f0, r3
; CHECK-LE-NEXT: mtfprwz f0, r3
; CHECK-LE-NEXT: addis r4, r2, .LCPI65_0@toc@ha
; CHECK-LE-NEXT: addi r3, r4, .LCPI65_0@toc@l
; CHECK-LE-NEXT: xxswapd vs0, vs0
; CHECK-LE-NEXT: xxspltw v2, vs0, 1
; CHECK-LE-NEXT: lvx v3, 0, r3
; CHECK-LE-NEXT: xxspltw v2, vs0, 3
; CHECK-LE-NEXT: vadduwm v2, v2, v3
; CHECK-LE-NEXT: blr
%b1 = insertelement <2 x i32> undef, i32 %v, i32 0

View File

@ -17,17 +17,15 @@ define <2 x double> @testi0(<2 x double>* %p1, double* %p2) {
; CHECK-NEXT: lxvd2x vs0, 0, r3
; CHECK-NEXT: lfdx f1, 0, r4
; CHECK-NEXT: xxswapd vs0, vs0
; CHECK-NEXT: xxspltd vs1, vs1, 0
; CHECK-NEXT: xxpermdi v2, vs0, vs1, 1
; CHECK-NEXT: xxmrghd v2, vs0, vs1
; CHECK-NEXT: blr
;
; CHECK-P9-VECTOR-LABEL: testi0:
; CHECK-P9-VECTOR: # %bb.0:
; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3
; CHECK-P9-VECTOR-NEXT: lfdx f1, 0, r4
; CHECK-P9-VECTOR-NEXT: xxspltd vs1, vs1, 0
; CHECK-P9-VECTOR-NEXT: xxswapd vs0, vs0
; CHECK-P9-VECTOR-NEXT: xxpermdi v2, vs0, vs1, 1
; CHECK-P9-VECTOR-NEXT: xxmrghd v2, vs0, vs1
; CHECK-P9-VECTOR-NEXT: blr
;
; CHECK-P9-LABEL: testi0:
@ -51,17 +49,15 @@ define <2 x double> @testi1(<2 x double>* %p1, double* %p2) {
; CHECK-NEXT: lxvd2x vs0, 0, r3
; CHECK-NEXT: lfdx f1, 0, r4
; CHECK-NEXT: xxswapd vs0, vs0
; CHECK-NEXT: xxspltd vs1, vs1, 0
; CHECK-NEXT: xxmrgld v2, vs1, vs0
; CHECK-NEXT: xxpermdi v2, vs1, vs0, 1
; CHECK-NEXT: blr
;
; CHECK-P9-VECTOR-LABEL: testi1:
; CHECK-P9-VECTOR: # %bb.0:
; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3
; CHECK-P9-VECTOR-NEXT: lfdx f1, 0, r4
; CHECK-P9-VECTOR-NEXT: xxspltd vs1, vs1, 0
; CHECK-P9-VECTOR-NEXT: xxswapd vs0, vs0
; CHECK-P9-VECTOR-NEXT: xxmrgld v2, vs1, vs0
; CHECK-P9-VECTOR-NEXT: xxpermdi v2, vs1, vs0, 1
; CHECK-P9-VECTOR-NEXT: blr
;
; CHECK-P9-LABEL: testi1: