[PowerPC] Exploit xxspltiw and xxspltidp instructions

Exploits the VSX Vector Splat Immediate Word and
VSX Vector Splat Immediate Double Precision instructions:

  xxspltiw XT,IMM32
  xxspltidp XT,IMM32

Differential Revision: https://reviews.llvm.org/D82911
This commit is contained in:
Anil Mahmud 2020-07-01 14:16:27 -05:00 committed by Lei Huang
parent 0670f855a7
commit c5b4f03b53
6 changed files with 539 additions and 28 deletions

View File

@ -1473,6 +1473,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::STFIWX: return "PPCISD::STFIWX"; case PPCISD::STFIWX: return "PPCISD::STFIWX";
case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::VPERM: return "PPCISD::VPERM";
case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
case PPCISD::XXSPLTI_SP_TO_DP:
return "PPCISD::XXSPLTI_SP_TO_DP";
case PPCISD::VECINSERT: return "PPCISD::VECINSERT"; case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI"; case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
case PPCISD::VECSHL: return "PPCISD::VECSHL"; case PPCISD::VECSHL: return "PPCISD::VECSHL";
@ -8966,9 +8968,9 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
// Vector related lowering. // Vector related lowering.
// //
/// BuildSplatI - Build a canonical splati of Val with an element size of /// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
/// SplatSize. Cast the result to VT. /// element size of SplatSize. Cast the result to VT.
static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
SelectionDAG &DAG, const SDLoc &dl) { SelectionDAG &DAG, const SDLoc &dl) {
static const MVT VTys[] = { // canonical VT to use for each size. static const MVT VTys[] = { // canonical VT to use for each size.
MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
@ -8976,9 +8978,11 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
// Force vspltis[hw] -1 to vspltisb -1 to canonicalize. // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
if (Val == -1) if (Val == ((1LU << (SplatSize * 8)) - 1)) {
SplatSize = 1; SplatSize = 1;
Val = 0xFF;
}
EVT CanonicalVT = VTys[SplatSize-1]; EVT CanonicalVT = VTys[SplatSize-1];
@ -9113,6 +9117,34 @@ static const SDValue *getNormalLoadInput(const SDValue &Op) {
return ISD::isNormalLoad(LD) ? InputLoad : nullptr; return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
} }
// Convert the argument APFloat to a single precision APFloat if there is no
// loss in information during the conversion to single precision APFloat and the
// resulting number is not a denormal number. Return true if successful.
bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
APFloat APFloatToConvert = ArgAPFloat;
bool LosesInfo = true;
APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
&LosesInfo);
bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
if (Success)
ArgAPFloat = APFloatToConvert;
return Success;
}
// Bitcast the argument APInt to a double and convert it to a single precision
// APFloat, bitcast the APFloat to an APInt and assign it to the original
// argument if there is no loss in information during the conversion from
// double to single precision APFloat and the resulting number is not a denormal
// number. Return true if successful.
bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
double DpValue = ArgAPInt.bitsToDouble();
APFloat APFloatDp(DpValue);
bool Success = convertToNonDenormSingle(APFloatDp);
if (Success)
ArgAPInt = APFloatDp.bitcastToAPInt();
return Success;
}
// If this is a case we can't handle, return null and let the default // If this is a case we can't handle, return null and let the default
// expansion code take care of it. If we CAN select this case, and if it // expansion code take care of it. If we CAN select this case, and if it
// selects to a single instruction, return Op. Otherwise, if we can codegen // selects to a single instruction, return Op. Otherwise, if we can codegen
@ -9232,9 +9264,23 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
APInt APSplatBits, APSplatUndef; APInt APSplatBits, APSplatUndef;
unsigned SplatBitSize; unsigned SplatBitSize;
bool HasAnyUndefs; bool HasAnyUndefs;
if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, bool BVNIsConstantSplat =
HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
SplatBitSize > 32) { HasAnyUndefs, 0, !Subtarget.isLittleEndian());
// If it is a splat of a double, check if we can shrink it to a 32 bit
// non-denormal float which when converted back to double gives us the same
// double. This is to exploit the XXSPLTIDP instruction.
if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() &&
(SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) &&
convertToNonDenormSingle(APSplatBits)) {
SDValue SplatNode = DAG.getNode(
PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
return DAG.getBitcast(Op.getValueType(), SplatNode);
}
if (!BVNIsConstantSplat || SplatBitSize > 32) {
const SDValue *InputLoad = getNormalLoadInput(Op.getOperand(0)); const SDValue *InputLoad = getNormalLoadInput(Op.getOperand(0));
// Handle load-and-splat patterns as we have instructions that will do this // Handle load-and-splat patterns as we have instructions that will do this
@ -9273,8 +9319,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
return SDValue(); return SDValue();
} }
unsigned SplatBits = APSplatBits.getZExtValue(); uint64_t SplatBits = APSplatBits.getZExtValue();
unsigned SplatUndef = APSplatUndef.getZExtValue(); uint64_t SplatUndef = APSplatUndef.getZExtValue();
unsigned SplatSize = SplatBitSize / 8; unsigned SplatSize = SplatBitSize / 8;
// First, handle single instruction cases. // First, handle single instruction cases.
@ -9289,17 +9335,30 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
return Op; return Op;
} }
// We have XXSPLTIB for constant splats one byte wide // We have XXSPLTIW for constant splats four bytes wide.
// FIXME: SplatBits is an unsigned int being cast to an int while passing it // Given vector length is a multiple of 4, 2-byte splats can be replaced
// as an argument to BuildSplatiI. Given SplatSize == 1 it is okay here. // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
// make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
// turned into a 4-byte splat of 0xABABABAB.
if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
return getCanonicalConstSplat((SplatBits |= SplatBits << 16), SplatSize * 2,
Op.getValueType(), DAG, dl);
if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
dl);
// We have XXSPLTIB for constant splats one byte wide.
if (Subtarget.hasP9Vector() && SplatSize == 1) if (Subtarget.hasP9Vector() && SplatSize == 1)
return BuildSplatI(SplatBits, SplatSize, Op.getValueType(), DAG, dl); return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
dl);
// If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
(32-SplatBitSize)); (32-SplatBitSize));
if (SextVal >= -16 && SextVal <= 15) if (SextVal >= -16 && SextVal <= 15)
return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
dl);
// Two instruction sequences. // Two instruction sequences.
@ -9330,7 +9389,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// for fneg/fabs. // for fneg/fabs.
if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
// Make -1 and vspltisw -1: // Make -1 and vspltisw -1:
SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
// Make the VSLW intrinsic, computing 0x8000_0000. // Make the VSLW intrinsic, computing 0x8000_0000.
SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
@ -9358,7 +9417,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// vsplti + shl self. // vsplti + shl self.
if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
static const unsigned IIDs[] = { // Intrinsic to use for each size. static const unsigned IIDs[] = { // Intrinsic to use for each size.
Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
Intrinsic::ppc_altivec_vslw Intrinsic::ppc_altivec_vslw
@ -9369,7 +9428,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// vsplti + srl self. // vsplti + srl self.
if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
static const unsigned IIDs[] = { // Intrinsic to use for each size. static const unsigned IIDs[] = { // Intrinsic to use for each size.
Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
Intrinsic::ppc_altivec_vsrw Intrinsic::ppc_altivec_vsrw
@ -9380,7 +9439,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// vsplti + sra self. // vsplti + sra self.
if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
static const unsigned IIDs[] = { // Intrinsic to use for each size. static const unsigned IIDs[] = { // Intrinsic to use for each size.
Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
Intrinsic::ppc_altivec_vsraw Intrinsic::ppc_altivec_vsraw
@ -9392,7 +9451,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// vsplti + rol self. // vsplti + rol self.
if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
static const unsigned IIDs[] = { // Intrinsic to use for each size. static const unsigned IIDs[] = { // Intrinsic to use for each size.
Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
Intrinsic::ppc_altivec_vrlw Intrinsic::ppc_altivec_vrlw
@ -9403,19 +9462,19 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// t = vsplti c, result = vsldoi t, t, 1 // t = vsplti c, result = vsldoi t, t, 1
if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1; unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
} }
// t = vsplti c, result = vsldoi t, t, 2 // t = vsplti c, result = vsldoi t, t, 2
if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2; unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
} }
// t = vsplti c, result = vsldoi t, t, 3 // t = vsplti c, result = vsldoi t, t, 3
if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3; unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
} }
@ -10817,9 +10876,9 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
if (Op.getValueType() == MVT::v4i32) { if (Op.getValueType() == MVT::v4i32) {
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. // +16 as shift amt.
SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
SDValue RHSSwap = // = vrlw RHS, 16 SDValue RHSSwap = // = vrlw RHS, 16
BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
@ -16239,6 +16298,13 @@ bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
return false; return false;
case MVT::f32: case MVT::f32:
case MVT::f64: case MVT::f64:
if (Subtarget.hasPrefixInstrs()) {
// With prefixed instructions, we can materialize anything that can be
// represented with a 32-bit immediate, not just positive zero.
APFloat APFloatOfImm = Imm;
return convertToNonDenormSingle(APFloatOfImm);
}
LLVM_FALLTHROUGH;
case MVT::ppcf128: case MVT::ppcf128:
return Imm.isPosZero(); return Imm.isPosZero();
} }

View File

@ -97,6 +97,11 @@ namespace llvm {
/// ///
XXSPLT, XXSPLT,
/// XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for
/// converting immediate single precision numbers to double precision
/// vector or scalar.
XXSPLTI_SP_TO_DP,
/// VECINSERT - The PPC vector insert instruction /// VECINSERT - The PPC vector insert instruction
/// ///
VECINSERT, VECINSERT,
@ -1273,6 +1278,9 @@ namespace llvm {
bool isIntS16Immediate(SDNode *N, int16_t &Imm); bool isIntS16Immediate(SDNode *N, int16_t &Imm);
bool isIntS16Immediate(SDValue Op, int16_t &Imm); bool isIntS16Immediate(SDValue Op, int16_t &Imm);
bool convertToNonDenormSingle(APInt &ArgAPInt);
bool convertToNonDenormSingle(APFloat &ArgAPFloat);
} // end namespace llvm } // end namespace llvm
#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H #endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H

View File

@ -50,6 +50,10 @@ def SDT_PPCVecSplat : SDTypeProfile<1, 2, [ SDTCisVec<0>,
SDTCisVec<1>, SDTCisInt<2> SDTCisVec<1>, SDTCisInt<2>
]>; ]>;
def SDT_PPCSpToDp : SDTypeProfile<1, 1, [ SDTCisVT<0, v2f64>,
SDTCisInt<1>
]>;
def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>, def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>,
SDTCisVec<1>, SDTCisVec<2>, SDTCisPtrTy<3> SDTCisVec<1>, SDTCisVec<2>, SDTCisPtrTy<3>
]>; ]>;
@ -194,6 +198,7 @@ def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>; def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>;
def PPCxxspltidp : SDNode<"PPCISD::XXSPLTI_SP_TO_DP", SDT_PPCSpToDp, []>;
def PPCvecinsert : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsert, []>; def PPCvecinsert : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsert, []>;
def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>; def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>;
def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>; def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>;
@ -326,6 +331,23 @@ def PPCmatpcreladdr : SDNode<"PPCISD::MAT_PCREL_ADDR", SDTIntUnaryOp, []>;
// PowerPC specific transformation functions and pattern fragments. // PowerPC specific transformation functions and pattern fragments.
// //
// A floating point immediate that is not a positive zero and can be converted
// to a single precision floating point non-denormal immediate without loss of
// information.
def nzFPImmAsi32 : PatLeaf<(fpimm), [{
APFloat APFloatOfN = N->getValueAPF();
return convertToNonDenormSingle(APFloatOfN) && !N->isExactlyValue(+0.0);
}]>;
// Convert the floating point immediate into a 32 bit floating point immediate
// and get a i32 with the resulting bits.
def getFPAs32BitInt : SDNodeXForm<fpimm, [{
APFloat APFloatOfN = N->getValueAPF();
convertToNonDenormSingle(APFloatOfN);
return CurDAG->getTargetConstant(APFloatOfN.bitcastToAPInt().getZExtValue(),
SDLoc(N), MVT::i32);
}]>;
def SHL32 : SDNodeXForm<imm, [{ def SHL32 : SDNodeXForm<imm, [{
// Transformation function: 31 - imm // Transformation function: 31 - imm
return getI32Imm(31 - N->getZExtValue(), SDLoc(N)); return getI32Imm(31 - N->getZExtValue(), SDLoc(N));
@ -392,6 +414,7 @@ def immZExt16 : PatLeaf<(imm), [{
def immNonAllOneAnyExt8 : ImmLeaf<i32, [{ def immNonAllOneAnyExt8 : ImmLeaf<i32, [{
return (isInt<8>(Imm) && (Imm != -1)) || (isUInt<8>(Imm) && (Imm != 0xFF)); return (isInt<8>(Imm) && (Imm != -1)) || (isUInt<8>(Imm) && (Imm != 0xFF));
}]>; }]>;
def i32immNonAllOneNonZero : ImmLeaf<i32, [{ return Imm && (Imm != -1); }]>;
def immSExt5NonZero : ImmLeaf<i32, [{ return Imm && isInt<5>(Imm); }]>; def immSExt5NonZero : ImmLeaf<i32, [{ return Imm && isInt<5>(Imm); }]>;
// imm16Shifted* - These match immediates where the low 16-bits are zero. There // imm16Shifted* - These match immediates where the low 16-bits are zero. There

View File

@ -704,7 +704,8 @@ let Predicates = [PrefixInstrs] in {
def XXSPLTIDP : 8RR_DForm_IMM32_XT6<32, 2, (outs vsrc:$XT), def XXSPLTIDP : 8RR_DForm_IMM32_XT6<32, 2, (outs vsrc:$XT),
(ins i32imm:$IMM32), (ins i32imm:$IMM32),
"xxspltidp $XT, $IMM32", IIC_VecGeneral, "xxspltidp $XT, $IMM32", IIC_VecGeneral,
[]>; [(set v2f64:$XT,
(PPCxxspltidp i32:$IMM32))]>;
def XXSPLTI32DX : def XXSPLTI32DX :
8RR_DForm_IMM32_XT6_IX<32, 0, (outs vsrc:$XT), 8RR_DForm_IMM32_XT6_IX<32, 0, (outs vsrc:$XT),
(ins vsrc:$XTi, i1imm:$IX, i32imm:$IMM32), (ins vsrc:$XTi, i1imm:$IX, i32imm:$IMM32),
@ -822,3 +823,17 @@ let Predicates = [IsISA3_1] in {
def : Pat<(v2i64 (int_ppc_vsx_xxgenpcvdm v2i64:$VRB, imm:$IMM)), def : Pat<(v2i64 (int_ppc_vsx_xxgenpcvdm v2i64:$VRB, imm:$IMM)),
(v2i64 (COPY_TO_REGCLASS (XXGENPCVDM $VRB, imm:$IMM), VRRC))>; (v2i64 (COPY_TO_REGCLASS (XXGENPCVDM $VRB, imm:$IMM), VRRC))>;
} }
let AddedComplexity = 400, Predicates = [PrefixInstrs] in {
def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A,
i32immNonAllOneNonZero:$A,
i32immNonAllOneNonZero:$A,
i32immNonAllOneNonZero:$A)),
(v4i32 (XXSPLTIW imm:$A))>;
def : Pat<(f32 nzFPImmAsi32:$A),
(COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)),
VSFRC)>;
def : Pat<(f64 nzFPImmAsi32:$A),
(COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)),
VSFRC)>;
}

View File

@ -0,0 +1,111 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s \
; RUN: --check-prefix=CHECK-NOPCREL
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
; RUN: -mattr=-pcrelative-memops -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-NOPCREL
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
; RUN: -ppc-asm-full-reg-names -target-abi=elfv2 -mcpu=pwr10 < %s | \
; RUN: FileCheck %s
define dso_local <2 x double> @testDoubleToDoubleFail() local_unnamed_addr {
; CHECK-LABEL: testDoubleToDoubleFail:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: plxv vs34, .LCPI0_0@PCREL(0), 1
; CHECK-NEXT: blr
;
; CHECK-NOPCREL-LABEL: testDoubleToDoubleFail:
; CHECK-NOPCREL: # %bb.0: # %entry
; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI0_0@toc@ha
; CHECK-NOPCREL-NEXT: addi r3, r3, .LCPI0_0@toc@l
; CHECK-NOPCREL-NEXT: lxvx vs34, 0, r3
; CHECK-NOPCREL-NEXT: blr
entry:
ret <2 x double> <double 3.423300e+02, double 3.423300e+02>
}
define dso_local <2 x double> @testFloatDenormToDouble() local_unnamed_addr {
; CHECK-LABEL: testFloatDenormToDouble:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: plxv vs34, .LCPI1_0@PCREL(0), 1
; CHECK-NEXT: blr
;
; CHECK-NOPCREL-LABEL: testFloatDenormToDouble:
; CHECK-NOPCREL: # %bb.0: # %entry
; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI1_0@toc@ha
; CHECK-NOPCREL-NEXT: addi r3, r3, .LCPI1_0@toc@l
; CHECK-NOPCREL-NEXT: lxvx vs34, 0, r3
; CHECK-NOPCREL-NEXT: blr
entry:
ret <2 x double> <double 0x380B38FB80000000, double 0x380B38FB80000000>
}
define dso_local <2 x double> @testDoubleToDoubleNaNFail() local_unnamed_addr {
; CHECK-LABEL: testDoubleToDoubleNaNFail:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: plxv vs34, .LCPI2_0@PCREL(0), 1
; CHECK-NEXT: blr
;
; CHECK-NOPCREL-LABEL: testDoubleToDoubleNaNFail:
; CHECK-NOPCREL: # %bb.0: # %entry
; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI2_0@toc@ha
; CHECK-NOPCREL-NEXT: addi r3, r3, .LCPI2_0@toc@l
; CHECK-NOPCREL-NEXT: lxvx vs34, 0, r3
; CHECK-NOPCREL-NEXT: blr
entry:
ret <2 x double> <double 0xFFFFFFFFFFFFFFF0, double 0xFFFFFFFFFFFFFFF0>
}
define dso_local double @testDoubleNonRepresentableScalar() local_unnamed_addr {
; CHECK-LABEL: testDoubleNonRepresentableScalar:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: plfd f1, .LCPI3_0@PCREL(0), 1
; CHECK-NEXT: blr
;
; CHECK-NOPCREL-LABEL: testDoubleNonRepresentableScalar:
; CHECK-NOPCREL: # %bb.0: # %entry
; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI3_0@toc@ha
; CHECK-NOPCREL-NEXT: lfd f1, .LCPI3_0@toc@l(r3)
; CHECK-NOPCREL-NEXT: blr
entry:
ret double 3.423300e+02
}
define dso_local float @testFloatDenormScalar() local_unnamed_addr {
; CHECK-LABEL: testFloatDenormScalar:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: plfs f1, .LCPI4_0@PCREL(0), 1
; CHECK-NEXT: blr
;
; CHECK-NOPCREL-LABEL: testFloatDenormScalar:
; CHECK-NOPCREL: # %bb.0: # %entry
; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI4_0@toc@ha
; CHECK-NOPCREL-NEXT: lfs f1, .LCPI4_0@toc@l(r3)
; CHECK-NOPCREL-NEXT: blr
entry:
ret float 0x380B38FB80000000
}
define dso_local double @testFloatDenormToDoubleScalar() local_unnamed_addr {
; CHECK-LABEL: testFloatDenormToDoubleScalar:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: plfs f1, .LCPI5_0@PCREL(0), 1
; CHECK-NEXT: blr
;
; CHECK-NOPCREL-LABEL: testFloatDenormToDoubleScalar:
; CHECK-NOPCREL: # %bb.0: # %entry
; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI5_0@toc@ha
; CHECK-NOPCREL-NEXT: lfs f1, .LCPI5_0@toc@l(r3)
; CHECK-NOPCREL-NEXT: blr
entry:
ret double 0x380B38FB80000000
}

View File

@ -0,0 +1,288 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s
define dso_local <4 x i32> @testZero() local_unnamed_addr {
; CHECK-LABEL: testZero:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor vs34, vs34, vs34
; CHECK-NEXT: blr
entry:
ret <4 x i32> zeroinitializer
}
define dso_local <4 x float> @testZeroF() local_unnamed_addr {
; CHECK-LABEL: testZeroF:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor vs34, vs34, vs34
; CHECK-NEXT: blr
entry:
ret <4 x float> zeroinitializer
}
define dso_local <4 x i32> @testAllOneS() local_unnamed_addr {
; CHECK-LABEL: testAllOneS:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv vs34, vs34, vs34
; CHECK-NEXT: blr
entry:
ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
}
define dso_local <4 x i32> @test5Bit() local_unnamed_addr {
; CHECK-LABEL: test5Bit:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vspltisw v2, 7
; CHECK-NEXT: blr
entry:
ret <4 x i32> <i32 7, i32 7, i32 7, i32 7>
}
define dso_local <16 x i8> @test1ByteChar() local_unnamed_addr {
; CHECK-LABEL: test1ByteChar:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib vs34, 7
; CHECK-NEXT: blr
entry:
ret <16 x i8> <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
}
define dso_local <4 x i32> @test1ByteSplatInt() local_unnamed_addr {
; Here the splat of 171 or 0xABABABAB can be done using a byte splat
; of 0xAB using xxspltib while avoiding the use of xxspltiw.
; CHECK-LABEL: test1ByteSplatInt:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib vs34, 171
; CHECK-NEXT: blr
entry:
ret <4 x i32> <i32 -1414812757, i32 -1414812757, i32 -1414812757, i32 -1414812757>
}
define dso_local <4 x i32> @test5Bit2Ins() local_unnamed_addr {
; Splats within the range [-32,31] can be done using two vsplti[bhw]
; instructions, but we prefer the xxspltiw instruction to them.
; CHECK-LABEL: test5Bit2Ins:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw vs34, 16
; CHECK-NEXT: blr
entry:
ret <4 x i32> <i32 16, i32 16, i32 16, i32 16>
}
define dso_local <4 x float> @testFloatNegZero() local_unnamed_addr {
; 0.0f is not the same as -0.0f. We try to splat -0.0f
; CHECK-LABEL: testFloatNegZero:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw vs34, -2147483648
; CHECK-NEXT: blr
entry:
ret <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
}
define dso_local <4 x float> @testFloat() local_unnamed_addr {
; CHECK-LABEL: testFloat:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw vs34, 1135323709
; CHECK-NEXT: blr
entry:
ret <4 x float> <float 0x40757547A0000000, float 0x40757547A0000000, float 0x40757547A0000000, float 0x40757547A0000000>
}
define dso_local <4 x float> @testIntToFloat() local_unnamed_addr {
; CHECK-LABEL: testIntToFloat:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw vs34, 1135312896
; CHECK-NEXT: blr
entry:
ret <4 x float> <float 3.430000e+02, float 3.430000e+02, float 3.430000e+02, float 3.430000e+02>
}
define dso_local <4 x i32> @testUndefInt() local_unnamed_addr {
; CHECK-LABEL: testUndefInt:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw vs34, 18
; CHECK-NEXT: blr
entry:
ret <4 x i32> <i32 18, i32 undef, i32 undef, i32 18>
}
define dso_local <4 x float> @testUndefIntToFloat() local_unnamed_addr {
; CHECK-LABEL: testUndefIntToFloat:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw vs34, 1135312896
; CHECK-NEXT: blr
entry:
ret <4 x float> <float 3.430000e+02, float undef, float undef, float 3.430000e+02>
}
define dso_local <2 x i64> @testPseudo8Byte() local_unnamed_addr {
; CHECK-LABEL: testPseudo8Byte:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw vs34, -1430532899
; CHECK-NEXT: blr
entry:
ret <2 x i64> <i64 -6144092014192636707, i64 -6144092014192636707>
}
define dso_local <8 x i16> @test2Byte() local_unnamed_addr {
; CHECK-LABEL: test2Byte:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw vs34, 1179666
; CHECK-NEXT: blr
entry:
ret <8 x i16> <i16 18, i16 18, i16 18, i16 18, i16 18, i16 18, i16 18, i16 18>
}
define dso_local <8 x i16> @test2ByteUndef() local_unnamed_addr {
; CHECK-LABEL: test2ByteUndef:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw vs34, 1179666
; CHECK-NEXT: blr
entry:
ret <8 x i16> <i16 18, i16 undef, i16 18, i16 18, i16 18, i16 undef, i16 18, i16 18>
}
define dso_local <2 x double> @testFloatToDouble() local_unnamed_addr {
; CHECK-LABEL: testFloatToDouble:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltidp vs34, 1135290941
; CHECK-NEXT: blr
entry:
ret <2 x double> <double 0x40756547A0000000, double 0x40756547A0000000>
}
define dso_local <2 x double> @testDoubleLower4ByteZero() local_unnamed_addr {
; The expanded double will have 0 in the last 32 bits. Imprecise handling of
; return value of data structures like APInt, returned when calling getZextValue
; , like saving the return value into an unsigned instead of uint64_t may cause
; this test to fail.
; CHECK-LABEL: testDoubleLower4ByteZero:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltidp vs34, 1093664768
; CHECK-NEXT: blr
entry:
ret <2 x double> <double 1.100000e+01, double 1.100000e+01>
}
define dso_local <2 x double> @testDoubleToDoubleZero() local_unnamed_addr {
; Should be using canonicalized form to splat zero and use shorter instructions
; than xxspltidp.
; CHECK-LABEL: testDoubleToDoubleZero:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor vs34, vs34, vs34
; CHECK-NEXT: blr
entry:
ret <2 x double> zeroinitializer
}
define dso_local <2 x double> @testDoubleToDoubleNegZero() local_unnamed_addr {
; CHECK-LABEL: testDoubleToDoubleNegZero:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltidp vs34, -2147483648
; CHECK-NEXT: blr
entry:
ret <2 x double> <double -0.000000e+00, double -0.000000e+00>
}
define dso_local <2 x double> @testDoubleToDoubleNaN() local_unnamed_addr {
; CHECK-LABEL: testDoubleToDoubleNaN:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltidp vs34, -16
; CHECK-NEXT: blr
entry:
ret <2 x double> <double 0xFFFFFFFE00000000, double 0xFFFFFFFE00000000>
}
define dso_local <2 x double> @testDoubleToDoubleInfinity() local_unnamed_addr {
; CHECK-LABEL: testDoubleToDoubleInfinity:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltidp vs34, 2139095040
; CHECK-NEXT: blr
entry:
ret <2 x double> <double 0x7FF0000000000000, double 0x7FF0000000000000>
}
define dso_local <2 x double> @testFloatToDoubleNaN() local_unnamed_addr {
; CHECK-LABEL: testFloatToDoubleNaN:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltidp vs34, -1
; CHECK-NEXT: blr
entry:
ret <2 x double> <double 0xFFFFFFFFE0000000, double 0xFFFFFFFFE0000000>
}
define dso_local <2 x double> @testFloatToDoubleInfinity() local_unnamed_addr {
; CHECK-LABEL: testFloatToDoubleInfinity:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltidp vs34, 2139095040
; CHECK-NEXT: blr
entry:
ret <2 x double> <double 0x7FF0000000000000, double 0x7FF0000000000000>
}
define dso_local float @testFloatScalar() local_unnamed_addr {
; CHECK-LABEL: testFloatScalar:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltidp vs1, 1135290941
; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; CHECK-NEXT: blr
entry:
ret float 0x40756547A0000000
}
define dso_local float @testFloatZeroScalar() local_unnamed_addr {
; CHECK-LABEL: testFloatZeroScalar:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor f1, f1, f1
; CHECK-NEXT: blr
entry:
ret float 0.000000e+00
}
define dso_local double @testDoubleRepresentableScalar() local_unnamed_addr {
; CHECK-LABEL: testDoubleRepresentableScalar:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltidp vs1, 1135290941
; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; CHECK-NEXT: blr
entry:
ret double 0x40756547A0000000
}
define dso_local double @testDoubleZeroScalar() local_unnamed_addr {
; CHECK-LABEL: testDoubleZeroScalar:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor f1, f1, f1
; CHECK-NEXT: blr
entry:
ret double 0.000000e+00
}