forked from OSchip/llvm-project
[PowerPC] Improvements for BUILD_VECTOR Vol. 1
This patch corresponds to review: https://reviews.llvm.org/D25912 This is the first patch in a series of 4 that improve the lowering and combining for BUILD_VECTOR nodes on PowerPC. llvm-svn: 288152
This commit is contained in:
parent
9b3ae73fc8
commit
df1cb520df
|
@ -563,10 +563,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
|||
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
|
||||
if (Subtarget.hasP8Altivec())
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
|
||||
if (Subtarget.hasVSX())
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
|
||||
|
||||
// Altivec does not contain unordered floating-point compare instructions
|
||||
setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
|
||||
|
@ -676,6 +672,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
|||
setOperationAction(ISD::FABS, MVT::v4f32, Legal);
|
||||
setOperationAction(ISD::FABS, MVT::v2f64, Legal);
|
||||
|
||||
if (Subtarget.hasDirectMove())
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
|
||||
|
||||
addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
|
||||
}
|
||||
|
||||
|
@ -688,9 +688,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
|||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
|
||||
}
|
||||
|
||||
if (Subtarget.isISA3_0() && Subtarget.hasDirectMove())
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
|
||||
}
|
||||
|
||||
if (Subtarget.hasQPX()) {
|
||||
|
@ -7129,14 +7126,55 @@ static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
|
|||
return DAG.getNode(ISD::BITCAST, dl, VT, T);
|
||||
}
|
||||
|
||||
static bool isNonConstSplatBV(BuildVectorSDNode *BVN, EVT Type) {
|
||||
if (BVN->isConstant() || BVN->getValueType(0) != Type)
|
||||
/// Do we have an efficient pattern in a .td file for this node?
|
||||
///
|
||||
/// \param V - pointer to the BuildVectorSDNode being matched
|
||||
/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
|
||||
///
|
||||
/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
|
||||
/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
|
||||
/// the opposite is true (expansion is beneficial) are:
|
||||
/// - The node builds a vector out of integers that are not 32 or 64-bits
|
||||
/// - The node builds a vector out of constants
|
||||
/// - The node is a "load-and-splat"
|
||||
/// In all other cases, we will choose to keep the BUILD_VECTOR.
|
||||
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
|
||||
bool HasDirectMove) {
|
||||
EVT VecVT = V->getValueType(0);
|
||||
bool RightType = VecVT == MVT::v2f64 || VecVT == MVT::v4f32 ||
|
||||
(HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
|
||||
if (!RightType)
|
||||
return false;
|
||||
auto OpZero = BVN->getOperand(0);
|
||||
for (int i = 1, e = BVN->getNumOperands(); i < e; i++)
|
||||
if (BVN->getOperand(i) != OpZero)
|
||||
|
||||
bool IsSplat = true;
|
||||
bool IsLoad = false;
|
||||
SDValue Op0 = V->getOperand(0);
|
||||
|
||||
// This function is called in a block that confirms the node is not a constant
|
||||
// splat. So a constant BUILD_VECTOR here means the vector is built out of
|
||||
// different constants.
|
||||
if (V->isConstant())
|
||||
return false;
|
||||
for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
|
||||
if (V->getOperand(i).isUndef())
|
||||
return false;
|
||||
return true;
|
||||
// We want to expand nodes that represent load-and-splat even if the
|
||||
// loaded value is a floating point truncation or conversion to int.
|
||||
if (V->getOperand(i).getOpcode() == ISD::LOAD ||
|
||||
(V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
|
||||
V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
|
||||
(V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
|
||||
V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
|
||||
(V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
|
||||
V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
|
||||
IsLoad = true;
|
||||
// If the operands are different or the input is not a load and has more
|
||||
// uses than just this BV node, then it isn't a splat.
|
||||
if (V->getOperand(i) != Op0 ||
|
||||
(!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
|
||||
IsSplat = false;
|
||||
}
|
||||
return !(IsSplat && IsLoad);
|
||||
}
|
||||
|
||||
// If this is a case we can't handle, return null and let the default
|
||||
|
@ -7261,14 +7299,11 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
|||
if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
|
||||
HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
|
||||
SplatBitSize > 32) {
|
||||
// We can splat a non-const value on CPU's that implement ISA 3.0
|
||||
// in two ways: LXVWSX (load and splat) and MTVSRWS(move and splat).
|
||||
auto OpZero = BVN->getOperand(0);
|
||||
bool CanLoadAndSplat = OpZero.getOpcode() == ISD::LOAD &&
|
||||
BVN->isOnlyUserOf(OpZero.getNode());
|
||||
if (Subtarget.isISA3_0() && !CanLoadAndSplat &&
|
||||
(isNonConstSplatBV(BVN, MVT::v4i32) ||
|
||||
isNonConstSplatBV(BVN, MVT::v2i64)))
|
||||
// BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
|
||||
// lowered to VSX instructions under certain conditions.
|
||||
// Without VSX, there is no pattern more efficient than expanding the node.
|
||||
if (Subtarget.hasVSX() &&
|
||||
haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove()))
|
||||
return Op;
|
||||
return SDValue();
|
||||
}
|
||||
|
@ -7290,8 +7325,20 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
|||
}
|
||||
|
||||
// We have XXSPLTIB for constant splats one byte wide
|
||||
if (Subtarget.isISA3_0() && Op.getValueType() == MVT::v16i8)
|
||||
if (Subtarget.hasP9Vector() && SplatSize == 1) {
|
||||
// This is a splat of 1-byte elements with some elements potentially undef.
|
||||
// Rather than trying to match undef in the SDAG patterns, ensure that all
|
||||
// elements are the same constant.
|
||||
if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) {
|
||||
SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits,
|
||||
dl, MVT::i32));
|
||||
SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
|
||||
if (Op.getValueType() != MVT::v16i8)
|
||||
return DAG.getBitcast(Op.getValueType(), NewBV);
|
||||
return NewBV;
|
||||
}
|
||||
return Op;
|
||||
}
|
||||
|
||||
// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
|
||||
int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
|
||||
|
@ -7539,7 +7586,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
|
|||
|
||||
// If the source for the shuffle is a scalar_to_vector that came from a
|
||||
// 32-bit load, it will have used LXVWSX so we don't need to splat again.
|
||||
if (Subtarget.isISA3_0() &&
|
||||
if (Subtarget.hasP9Vector() &&
|
||||
((isLittleEndian && SplatIdx == 3) ||
|
||||
(!isLittleEndian && SplatIdx == 0))) {
|
||||
SDValue Src = V1.getOperand(0);
|
||||
|
|
|
@ -327,6 +327,7 @@ def immZExt16 : PatLeaf<(imm), [{
|
|||
return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
|
||||
}], LO16>;
|
||||
def immSExt8 : ImmLeaf<i32, [{ return isInt<8>(Imm); }]>;
|
||||
def immSExt5NonZero : ImmLeaf<i32, [{ return Imm && isInt<5>(Imm); }]>;
|
||||
|
||||
// imm16Shifted* - These match immediates where the low 16-bits are zero. There
|
||||
// are two forms: imm16ShiftedSExt and imm16ShiftedZExt. These two forms are
|
||||
|
|
|
@ -570,18 +570,38 @@ let Uses = [RM] in {
|
|||
(outs vsfrc:$XT), (ins vsfrc:$XB),
|
||||
"xscvdpsxds $XT, $XB", IIC_VecFP,
|
||||
[(set f64:$XT, (PPCfctidz f64:$XB))]>;
|
||||
let isCodeGenOnly = 1 in
|
||||
def XSCVDPSXDSs : XX2Form<60, 344,
|
||||
(outs vssrc:$XT), (ins vssrc:$XB),
|
||||
"xscvdpsxds $XT, $XB", IIC_VecFP,
|
||||
[(set f32:$XT, (PPCfctidz f32:$XB))]>;
|
||||
def XSCVDPSXWS : XX2Form<60, 88,
|
||||
(outs vsfrc:$XT), (ins vsfrc:$XB),
|
||||
"xscvdpsxws $XT, $XB", IIC_VecFP,
|
||||
[(set f64:$XT, (PPCfctiwz f64:$XB))]>;
|
||||
let isCodeGenOnly = 1 in
|
||||
def XSCVDPSXWSs : XX2Form<60, 88,
|
||||
(outs vssrc:$XT), (ins vssrc:$XB),
|
||||
"xscvdpsxws $XT, $XB", IIC_VecFP,
|
||||
[(set f32:$XT, (PPCfctiwz f32:$XB))]>;
|
||||
def XSCVDPUXDS : XX2Form<60, 328,
|
||||
(outs vsfrc:$XT), (ins vsfrc:$XB),
|
||||
"xscvdpuxds $XT, $XB", IIC_VecFP,
|
||||
[(set f64:$XT, (PPCfctiduz f64:$XB))]>;
|
||||
let isCodeGenOnly = 1 in
|
||||
def XSCVDPUXDSs : XX2Form<60, 328,
|
||||
(outs vssrc:$XT), (ins vssrc:$XB),
|
||||
"xscvdpuxds $XT, $XB", IIC_VecFP,
|
||||
[(set f32:$XT, (PPCfctiduz f32:$XB))]>;
|
||||
def XSCVDPUXWS : XX2Form<60, 72,
|
||||
(outs vsfrc:$XT), (ins vsfrc:$XB),
|
||||
"xscvdpuxws $XT, $XB", IIC_VecFP,
|
||||
[(set f64:$XT, (PPCfctiwuz f64:$XB))]>;
|
||||
let isCodeGenOnly = 1 in
|
||||
def XSCVDPUXWSs : XX2Form<60, 72,
|
||||
(outs vssrc:$XT), (ins vssrc:$XB),
|
||||
"xscvdpuxws $XT, $XB", IIC_VecFP,
|
||||
[(set f32:$XT, (PPCfctiwuz f32:$XB))]>;
|
||||
def XSCVSPDP : XX2Form<60, 329,
|
||||
(outs vsfrc:$XT), (ins vsfrc:$XB),
|
||||
"xscvspdp $XT, $XB", IIC_VecFP, []>;
|
||||
|
@ -624,13 +644,15 @@ let Uses = [RM] in {
|
|||
"xvcvspsxds $XT, $XB", IIC_VecFP, []>;
|
||||
def XVCVSPSXWS : XX2Form<60, 152,
|
||||
(outs vsrc:$XT), (ins vsrc:$XB),
|
||||
"xvcvspsxws $XT, $XB", IIC_VecFP, []>;
|
||||
"xvcvspsxws $XT, $XB", IIC_VecFP,
|
||||
[(set v4i32:$XT, (fp_to_sint v4f32:$XB))]>;
|
||||
def XVCVSPUXDS : XX2Form<60, 392,
|
||||
(outs vsrc:$XT), (ins vsrc:$XB),
|
||||
"xvcvspuxds $XT, $XB", IIC_VecFP, []>;
|
||||
def XVCVSPUXWS : XX2Form<60, 136,
|
||||
(outs vsrc:$XT), (ins vsrc:$XB),
|
||||
"xvcvspuxws $XT, $XB", IIC_VecFP, []>;
|
||||
"xvcvspuxws $XT, $XB", IIC_VecFP,
|
||||
[(set v4i32:$XT, (fp_to_uint v4f32:$XB))]>;
|
||||
def XVCVSXDDP : XX2Form<60, 504,
|
||||
(outs vsrc:$XT), (ins vsrc:$XB),
|
||||
"xvcvsxddp $XT, $XB", IIC_VecFP,
|
||||
|
@ -661,7 +683,8 @@ let Uses = [RM] in {
|
|||
[(set v2f64:$XT, (int_ppc_vsx_xvcvuxwdp v4i32:$XB))]>;
|
||||
def XVCVUXWSP : XX2Form<60, 168,
|
||||
(outs vsrc:$XT), (ins vsrc:$XB),
|
||||
"xvcvuxwsp $XT, $XB", IIC_VecFP, []>;
|
||||
"xvcvuxwsp $XT, $XB", IIC_VecFP,
|
||||
[(set v4f32:$XT, (uint_to_fp v4i32:$XB))]>;
|
||||
|
||||
// Rounding Instructions
|
||||
def XSRDPI : XX2Form<60, 73,
|
||||
|
@ -1207,6 +1230,8 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
|
|||
|
||||
def : Pat<(f64 (extloadf32 xoaddr:$src)),
|
||||
(COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>;
|
||||
def : Pat<(f32 (fpround (extloadf32 xoaddr:$src))),
|
||||
(f32 (LXSSPX xoaddr:$src))>;
|
||||
def : Pat<(f64 (fpextend f32:$src)),
|
||||
(COPY_TO_REGCLASS $src, VSFRC)>;
|
||||
|
||||
|
@ -1384,7 +1409,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
|
|||
} // AddedComplexity = 400
|
||||
} // HasP8Vector
|
||||
|
||||
let UseVSXReg = 1 in {
|
||||
let UseVSXReg = 1, AddedComplexity = 400 in {
|
||||
let Predicates = [HasDirectMove] in {
|
||||
// VSX direct move instructions
|
||||
def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT),
|
||||
|
@ -1730,6 +1755,7 @@ def VectorExtractions {
|
|||
dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC);
|
||||
}
|
||||
|
||||
let AddedComplexity = 400 in {
|
||||
// v4f32 scalar <-> vector conversions (BE)
|
||||
let Predicates = [IsBigEndian, HasP8Vector] in {
|
||||
def : Pat<(v4f32 (scalar_to_vector f32:$A)),
|
||||
|
@ -1971,15 +1997,16 @@ def : Pat<(f64 (bitconvert i64:$S)),
|
|||
(f64 (MTVSRD $S))>;
|
||||
}
|
||||
|
||||
// Materialize a zero-vector of long long
|
||||
def : Pat<(v2i64 immAllZerosV),
|
||||
(v2i64 (XXLXORz))>;
|
||||
}
|
||||
|
||||
def AlignValues {
|
||||
dag F32_TO_BE_WORD1 = (v4f32 (XXSLDWI (XSCVDPSPN $B), (XSCVDPSPN $B), 3));
|
||||
dag I32_TO_BE_WORD1 = (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC);
|
||||
}
|
||||
|
||||
// Materialize a zero-vector of long long
|
||||
def : Pat<(v2i64 immAllZerosV),
|
||||
(v2i64 (XXLXORz))>;
|
||||
|
||||
// The following VSX instructions were introduced in Power ISA 3.0
|
||||
def HasP9Vector : Predicate<"PPCSubTarget->hasP9Vector()">;
|
||||
let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
||||
|
@ -2474,23 +2501,8 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
(v4i32 (LXVWSX xoaddr:$src))>;
|
||||
def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
|
||||
(v4f32 (LXVWSX xoaddr:$src))>;
|
||||
def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
|
||||
(v4i32 (MTVSRWS $A))>;
|
||||
def : Pat<(v16i8 (build_vector immSExt8:$A, immSExt8:$A, immSExt8:$A,
|
||||
immSExt8:$A, immSExt8:$A, immSExt8:$A,
|
||||
immSExt8:$A, immSExt8:$A, immSExt8:$A,
|
||||
immSExt8:$A, immSExt8:$A, immSExt8:$A,
|
||||
immSExt8:$A, immSExt8:$A, immSExt8:$A,
|
||||
immSExt8:$A)),
|
||||
(v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>;
|
||||
def : Pat<(v16i8 immAllOnesV),
|
||||
(v16i8 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>;
|
||||
def : Pat<(v8i16 immAllOnesV),
|
||||
(v8i16 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>;
|
||||
def : Pat<(v4i32 immAllOnesV),
|
||||
(v4i32 (XXSPLTIB 255))>;
|
||||
def : Pat<(v2i64 immAllOnesV),
|
||||
(v2i64 (XXSPLTIB 255))>;
|
||||
def : Pat<(v4f32 (scalar_to_vector (f32 (fpround (extloadf32 xoaddr:$src))))),
|
||||
(v4f32 (LXVWSX xoaddr:$src))>;
|
||||
|
||||
// Build vectors from i8 loads
|
||||
def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)),
|
||||
|
@ -2631,6 +2643,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
(f64 (COPY_TO_REGCLASS (VEXTSB2Ds $A), VSFRC))>;
|
||||
def : Pat<(f64 (PPCVexts f64:$A, 2)),
|
||||
(f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>;
|
||||
|
||||
let isPseudo = 1 in {
|
||||
def DFLOADf32 : Pseudo<(outs vssrc:$XT), (ins memrix:$src),
|
||||
"#DFLOADf32",
|
||||
|
@ -2647,18 +2660,260 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
}
|
||||
def : Pat<(f64 (extloadf32 iaddr:$src)),
|
||||
(COPY_TO_REGCLASS (DFLOADf32 iaddr:$src), VSFRC)>;
|
||||
def : Pat<(f32 (fpround (extloadf32 iaddr:$src))),
|
||||
(f32 (DFLOADf32 iaddr:$src))>;
|
||||
} // end HasP9Vector, AddedComplexity
|
||||
|
||||
let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in {
|
||||
def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)),
|
||||
(v2i64 (MTVSRDD $rB, $rA))>;
|
||||
def : Pat<(i64 (extractelt v2i64:$A, 0)),
|
||||
(i64 (MFVSRLD $A))>;
|
||||
// Integer extend helper dags 32 -> 64
|
||||
def AnyExts {
|
||||
dag A = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32);
|
||||
dag B = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $B, sub_32);
|
||||
dag C = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $C, sub_32);
|
||||
dag D = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $D, sub_32);
|
||||
}
|
||||
|
||||
let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in {
|
||||
def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)),
|
||||
(v2i64 (MTVSRDD $rB, $rA))>;
|
||||
def : Pat<(i64 (extractelt v2i64:$A, 1)),
|
||||
(i64 (MFVSRLD $A))>;
|
||||
def DblToFlt {
|
||||
dag A0 = (f32 (fpround (f64 (extractelt v2f64:$A, 0))));
|
||||
dag A1 = (f32 (fpround (f64 (extractelt v2f64:$A, 1))));
|
||||
dag B0 = (f32 (fpround (f64 (extractelt v2f64:$B, 0))));
|
||||
dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1))));
|
||||
}
|
||||
def FltToIntLoad {
|
||||
dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (extloadf32 xoaddr:$A)))));
|
||||
}
|
||||
def FltToUIntLoad {
|
||||
dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (extloadf32 xoaddr:$A)))));
|
||||
}
|
||||
def FltToLongLoad {
|
||||
dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A)))));
|
||||
}
|
||||
def FltToULongLoad {
|
||||
dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A)))));
|
||||
}
|
||||
def FltToLong {
|
||||
dag A = (i64 (PPCmfvsr (PPCfctidz (fpextend f32:$A))));
|
||||
}
|
||||
def FltToULong {
|
||||
dag A = (i64 (PPCmfvsr (PPCfctiduz (fpextend f32:$A))));
|
||||
}
|
||||
def DblToInt {
|
||||
dag A = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$A))));
|
||||
}
|
||||
def DblToUInt {
|
||||
dag A = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$A))));
|
||||
}
|
||||
def DblToLong {
|
||||
dag A = (i64 (PPCmfvsr (f64 (PPCfctidz f64:$A))));
|
||||
}
|
||||
def DblToULong {
|
||||
dag A = (i64 (PPCmfvsr (f64 (PPCfctiduz f64:$A))));
|
||||
}
|
||||
def DblToIntLoad {
|
||||
dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A)))));
|
||||
}
|
||||
def DblToUIntLoad {
|
||||
dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A)))));
|
||||
}
|
||||
def DblToLongLoad {
|
||||
dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A)))));
|
||||
}
|
||||
def DblToULongLoad {
|
||||
dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (load xoaddr:$A)))));
|
||||
}
|
||||
|
||||
// FP merge dags (for f32 -> v4f32)
|
||||
def MrgFP {
|
||||
dag AC = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $A, VSRC),
|
||||
(COPY_TO_REGCLASS $C, VSRC), 0));
|
||||
dag BD = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $B, VSRC),
|
||||
(COPY_TO_REGCLASS $D, VSRC), 0));
|
||||
dag ABhToFlt = (XVCVDPSP (XXPERMDI $A, $B, 0));
|
||||
dag ABlToFlt = (XVCVDPSP (XXPERMDI $A, $B, 3));
|
||||
dag BAhToFlt = (XVCVDPSP (XXPERMDI $B, $A, 0));
|
||||
dag BAlToFlt = (XVCVDPSP (XXPERMDI $B, $A, 3));
|
||||
}
|
||||
|
||||
// Patterns for BUILD_VECTOR nodes.
|
||||
def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">;
|
||||
let AddedComplexity = 400 in {
|
||||
|
||||
let Predicates = [HasVSX] in {
|
||||
// Build vectors of floating point converted to i32.
|
||||
def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.A,
|
||||
DblToInt.A, DblToInt.A)),
|
||||
(v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS $A), VSRC), 1))>;
|
||||
def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.A,
|
||||
DblToUInt.A, DblToUInt.A)),
|
||||
(v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS $A), VSRC), 1))>;
|
||||
def : Pat<(v2i64 (build_vector DblToLong.A, DblToLong.A)),
|
||||
(v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC),
|
||||
(COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC), 0))>;
|
||||
def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)),
|
||||
(v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC),
|
||||
(COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 0))>;
|
||||
def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)),
|
||||
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
|
||||
(XSCVDPSXWSs (LXSSPX xoaddr:$A)), VSRC), 1))>;
|
||||
def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
|
||||
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
|
||||
(XSCVDPUXWSs (LXSSPX xoaddr:$A)), VSRC), 1))>;
|
||||
def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),
|
||||
(v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;
|
||||
|
||||
// Build vectors of floating point converted to i64.
|
||||
def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)),
|
||||
(v2i64 (XXPERMDIs (COPY_TO_REGCLASS (XSCVDPSXDSs $A), VSFRC), 0))>;
|
||||
def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)),
|
||||
(v2i64 (XXPERMDIs (COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>;
|
||||
def : Pat<(v2i64 (scalar_to_vector DblToLongLoad.A)),
|
||||
(v2i64 (XVCVDPSXDS (LXVDSX xoaddr:$A)))>;
|
||||
def : Pat<(v2i64 (scalar_to_vector DblToULongLoad.A)),
|
||||
(v2i64 (XVCVDPUXDS (LXVDSX xoaddr:$A)))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasVSX, NoP9Vector] in {
|
||||
// Load-and-splat with fp-to-int conversion (using X-Form VSX loads).
|
||||
def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)),
|
||||
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
|
||||
(XSCVDPSXWS (LXSDX xoaddr:$A)), VSRC), 1))>;
|
||||
def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)),
|
||||
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
|
||||
(XSCVDPUXWS (LXSDX xoaddr:$A)), VSRC), 1))>;
|
||||
def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)),
|
||||
(v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
|
||||
(LXSSPX xoaddr:$A), VSFRC)), 0))>;
|
||||
def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)),
|
||||
(v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
|
||||
(LXSSPX xoaddr:$A), VSFRC)), 0))>;
|
||||
}
|
||||
|
||||
// Big endian, available on all targets with VSX
|
||||
let Predicates = [IsBigEndian, HasVSX] in {
|
||||
def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
|
||||
(v2f64 (XXPERMDI
|
||||
(COPY_TO_REGCLASS $A, VSRC),
|
||||
(COPY_TO_REGCLASS $B, VSRC), 0))>;
|
||||
|
||||
def : Pat<(v4f32 (build_vector f32:$A, f32:$B, f32:$C, f32:$D)),
|
||||
(VMRGEW MrgFP.AC, MrgFP.BD)>;
|
||||
def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
|
||||
DblToFlt.B0, DblToFlt.B1)),
|
||||
(v4f32 (VMRGEW MrgFP.ABhToFlt, MrgFP.ABlToFlt))>;
|
||||
}
|
||||
|
||||
let Predicates = [IsLittleEndian, HasVSX] in {
|
||||
// Little endian, available on all targets with VSX
|
||||
def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
|
||||
(v2f64 (XXPERMDI
|
||||
(COPY_TO_REGCLASS $B, VSRC),
|
||||
(COPY_TO_REGCLASS $A, VSRC), 0))>;
|
||||
|
||||
def : Pat<(v4f32 (build_vector f32:$D, f32:$C, f32:$B, f32:$A)),
|
||||
(VMRGEW MrgFP.AC, MrgFP.BD)>;
|
||||
def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
|
||||
DblToFlt.B0, DblToFlt.B1)),
|
||||
(v4f32 (VMRGEW MrgFP.BAhToFlt, MrgFP.BAlToFlt))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasDirectMove] in {
|
||||
// Endianness-neutral constant splat on P8 and newer targets. The reason
|
||||
// for this pattern is that on targets with direct moves, we don't expand
|
||||
// BUILD_VECTOR nodes for v4i32.
|
||||
def : Pat<(v4i32 (build_vector immSExt5NonZero:$A, immSExt5NonZero:$A,
|
||||
immSExt5NonZero:$A, immSExt5NonZero:$A)),
|
||||
(v4i32 (VSPLTISW imm:$A))>;
|
||||
}
|
||||
|
||||
let Predicates = [IsBigEndian, HasDirectMove, NoP9Vector] in {
|
||||
// Big endian integer vectors using direct moves.
|
||||
def : Pat<(v2i64 (build_vector i64:$A, i64:$B)),
|
||||
(v2i64 (XXPERMDI
|
||||
(COPY_TO_REGCLASS (MTVSRD $A), VSRC),
|
||||
(COPY_TO_REGCLASS (MTVSRD $B), VSRC), 0))>;
|
||||
def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
|
||||
(VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC),
|
||||
(COPY_TO_REGCLASS (MTVSRWZ $C), VSRC), 0),
|
||||
(XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC),
|
||||
(COPY_TO_REGCLASS (MTVSRWZ $D), VSRC), 0))>;
|
||||
def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
|
||||
(XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
|
||||
}
|
||||
|
||||
let Predicates = [IsLittleEndian, HasDirectMove, NoP9Vector] in {
|
||||
// Little endian integer vectors using direct moves.
|
||||
def : Pat<(v2i64 (build_vector i64:$A, i64:$B)),
|
||||
(v2i64 (XXPERMDI
|
||||
(COPY_TO_REGCLASS (MTVSRD $B), VSRC),
|
||||
(COPY_TO_REGCLASS (MTVSRD $A), VSRC), 0))>;
|
||||
def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
|
||||
(VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $D), VSRC),
|
||||
(COPY_TO_REGCLASS (MTVSRWZ $B), VSRC), 0),
|
||||
(XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $C), VSRC),
|
||||
(COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 0))>;
|
||||
def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
|
||||
(XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasP9Vector] in {
|
||||
// Endianness-neutral patterns for const splats with ISA 3.0 instructions.
|
||||
def : Pat<(v4i32 (scalar_to_vector i32:$A)),
|
||||
(v4i32 (MTVSRWS $A))>;
|
||||
def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
|
||||
(v4i32 (MTVSRWS $A))>;
|
||||
def : Pat<(v16i8 (build_vector immSExt8:$A, immSExt8:$A, immSExt8:$A,
|
||||
immSExt8:$A, immSExt8:$A, immSExt8:$A,
|
||||
immSExt8:$A, immSExt8:$A, immSExt8:$A,
|
||||
immSExt8:$A, immSExt8:$A, immSExt8:$A,
|
||||
immSExt8:$A, immSExt8:$A, immSExt8:$A,
|
||||
immSExt8:$A)),
|
||||
(v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>;
|
||||
def : Pat<(v16i8 immAllOnesV),
|
||||
(v16i8 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>;
|
||||
def : Pat<(v8i16 immAllOnesV),
|
||||
(v8i16 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>;
|
||||
def : Pat<(v4i32 immAllOnesV),
|
||||
(v4i32 (XXSPLTIB 255))>;
|
||||
def : Pat<(v2i64 immAllOnesV),
|
||||
(v2i64 (XXSPLTIB 255))>;
|
||||
def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)),
|
||||
(v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>;
|
||||
def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
|
||||
(v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>;
|
||||
def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)),
|
||||
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
|
||||
(XSCVDPSXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>;
|
||||
def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)),
|
||||
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
|
||||
(XSCVDPUXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>;
|
||||
def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)),
|
||||
(v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
|
||||
(DFLOADf32 iaddr:$A),
|
||||
VSFRC)), 0))>;
|
||||
def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)),
|
||||
(v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
|
||||
(DFLOADf32 iaddr:$A),
|
||||
VSFRC)), 0))>;
|
||||
}
|
||||
|
||||
let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in {
|
||||
def : Pat<(i64 (extractelt v2i64:$A, 1)),
|
||||
(i64 (MFVSRLD $A))>;
|
||||
// Better way to build integer vectors if we have MTVSRDD. Big endian.
|
||||
def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)),
|
||||
(v2i64 (MTVSRDD $rB, $rA))>;
|
||||
def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
|
||||
(VMRGOW (COPY_TO_REGCLASS (MTVSRDD AnyExts.A, AnyExts.C), VSRC),
|
||||
(COPY_TO_REGCLASS (MTVSRDD AnyExts.B, AnyExts.D), VSRC))>;
|
||||
}
|
||||
|
||||
let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in {
|
||||
def : Pat<(i64 (extractelt v2i64:$A, 0)),
|
||||
(i64 (MFVSRLD $A))>;
|
||||
// Better way to build integer vectors if we have MTVSRDD. Little endian.
|
||||
def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)),
|
||||
(v2i64 (MTVSRDD $rB, $rA))>;
|
||||
def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
|
||||
(VMRGOW (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC),
|
||||
(COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC))>;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,10 +46,10 @@ entry:
|
|||
%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
|
||||
%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
ret <4 x i32> %splat.splat
|
||||
; CHECK: sldi [[REG1:[0-9]+]], 3, 32
|
||||
; CHECK: mtvsrd {{[0-9]+}}, [[REG1]]
|
||||
; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
|
||||
; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
|
||||
; CHECK: mtvsrwz [[REG1:[0-9]+]], 3
|
||||
; CHECK: xxspltw 34, [[REG1]]
|
||||
; CHECK-LE: mtvsrwz [[REG1:[0-9]+]], 3
|
||||
; CHECK-LE: xxspltw 34, [[REG1]]
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
|
|
|
@ -10,15 +10,9 @@ entry:
|
|||
; The FIXME below is due to the lowering for BUILD_VECTOR needing a re-vamp
|
||||
; which will happen in a subsequent patch.
|
||||
; CHECK-LABEL: test1
|
||||
; FIXME: mtvsrdd 34, 4, 3
|
||||
; CHECK: mtvsrd {{[0-9]+}}, 3
|
||||
; CHECK: mtvsrd {{[0-9]+}}, 4
|
||||
; CHECK: xxmrgld
|
||||
; CHECK: mtvsrdd 34, 4, 3
|
||||
; CHECK-BE-LABEL: test1
|
||||
; FIXME-BE: mtvsrdd 34, 3, 4
|
||||
; CHECK-BE: mtvsrd {{[0-9]+}}, 4
|
||||
; CHECK-BE: mtvsrd {{[0-9]+}}, 3
|
||||
; CHECK-BE: xxmrghd
|
||||
; CHECK-BE: mtvsrdd 34, 3, 4
|
||||
%vecins = insertelement <2 x i64> undef, i64 %a, i32 0
|
||||
%vecins1 = insertelement <2 x i64> %vecins, i64 %b, i32 1
|
||||
ret <2 x i64> %vecins1
|
||||
|
@ -162,10 +156,14 @@ define <4 x i32> @test14(<4 x i32> %a, i32* nocapture readonly %b) {
|
|||
entry:
|
||||
; CHECK-LABEL: test14
|
||||
; CHECK: lwz [[LD:[0-9]+]],
|
||||
; CHECK: mtvsrws 34, [[LD]]
|
||||
; FIXME: mtvsrws 34, [[LD]]
|
||||
; CHECK: mtvsrws [[SPLT:[0-9]+]], [[LD]]
|
||||
; CHECK: xxspltw 34, [[SPLT]], 3
|
||||
; CHECK-BE-LABEL: test14
|
||||
; CHECK-BE: lwz [[LD:[0-9]+]],
|
||||
; CHECK-BE: mtvsrws 34, [[LD]]
|
||||
; FIXME: mtvsrws 34, [[LD]]
|
||||
; CHECK-BE: mtvsrws [[SPLT:[0-9]+]], [[LD]]
|
||||
; CHECK-BE: xxspltw 34, [[SPLT]], 0
|
||||
%0 = load i32, i32* %b, align 4
|
||||
%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
|
||||
%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
|
|
|
@ -1096,9 +1096,7 @@ define <2 x double> @test69(<2 x i16> %a) {
|
|||
; CHECK-LE: mtvsrwa
|
||||
; CHECK-LE: xscvsxddp
|
||||
; CHECK-LE: xscvsxddp
|
||||
; CHECK-LE: xxspltd
|
||||
; CHECK-LE: xxspltd
|
||||
; CHECK-LE: xxmrgld
|
||||
; CHECK-LE: xxmrghd
|
||||
; CHECK-LE: blr
|
||||
}
|
||||
|
||||
|
@ -1121,9 +1119,7 @@ define <2 x double> @test70(<2 x i8> %a) {
|
|||
; CHECK-LE: mtvsrwa
|
||||
; CHECK-LE: xscvsxddp
|
||||
; CHECK-LE: xscvsxddp
|
||||
; CHECK-LE: xxspltd
|
||||
; CHECK-LE: xxspltd
|
||||
; CHECK-LE: xxmrgld
|
||||
; CHECK-LE: xxmrghd
|
||||
; CHECK-LE: blr
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue