forked from OSchip/llvm-project
[PowerPC] Exploit xxspltiw and xxspltidp instructions
Exploits the VSX Vector Splat Immediate Word and VSX Vector Splat Immediate Double Precision instructions: xxspltiw XT,IMM32 xxspltidp XT,IMM32 Differential Revision: https://reviews.llvm.org/D82911
This commit is contained in:
parent
0670f855a7
commit
c5b4f03b53
|
@ -1473,6 +1473,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case PPCISD::STFIWX: return "PPCISD::STFIWX";
|
||||
case PPCISD::VPERM: return "PPCISD::VPERM";
|
||||
case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
|
||||
case PPCISD::XXSPLTI_SP_TO_DP:
|
||||
return "PPCISD::XXSPLTI_SP_TO_DP";
|
||||
case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
|
||||
case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
|
||||
case PPCISD::VECSHL: return "PPCISD::VECSHL";
|
||||
|
@ -8966,9 +8968,9 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
|
|||
// Vector related lowering.
|
||||
//
|
||||
|
||||
/// BuildSplatI - Build a canonical splati of Val with an element size of
|
||||
/// SplatSize. Cast the result to VT.
|
||||
static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
|
||||
/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
|
||||
/// element size of SplatSize. Cast the result to VT.
|
||||
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
|
||||
SelectionDAG &DAG, const SDLoc &dl) {
|
||||
static const MVT VTys[] = { // canonical VT to use for each size.
|
||||
MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
|
||||
|
@ -8976,9 +8978,11 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
|
|||
|
||||
EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
|
||||
|
||||
// Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
|
||||
if (Val == -1)
|
||||
// For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
|
||||
if (Val == ((1LU << (SplatSize * 8)) - 1)) {
|
||||
SplatSize = 1;
|
||||
Val = 0xFF;
|
||||
}
|
||||
|
||||
EVT CanonicalVT = VTys[SplatSize-1];
|
||||
|
||||
|
@ -9113,6 +9117,34 @@ static const SDValue *getNormalLoadInput(const SDValue &Op) {
|
|||
return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
|
||||
}
|
||||
|
||||
// Convert the argument APFloat to a single precision APFloat if there is no
|
||||
// loss in information during the conversion to single precision APFloat and the
|
||||
// resulting number is not a denormal number. Return true if successful.
|
||||
bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
|
||||
APFloat APFloatToConvert = ArgAPFloat;
|
||||
bool LosesInfo = true;
|
||||
APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
|
||||
&LosesInfo);
|
||||
bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
|
||||
if (Success)
|
||||
ArgAPFloat = APFloatToConvert;
|
||||
return Success;
|
||||
}
|
||||
|
||||
// Bitcast the argument APInt to a double and convert it to a single precision
|
||||
// APFloat, bitcast the APFloat to an APInt and assign it to the original
|
||||
// argument if there is no loss in information during the conversion from
|
||||
// double to single precision APFloat and the resulting number is not a denormal
|
||||
// number. Return true if successful.
|
||||
bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
|
||||
double DpValue = ArgAPInt.bitsToDouble();
|
||||
APFloat APFloatDp(DpValue);
|
||||
bool Success = convertToNonDenormSingle(APFloatDp);
|
||||
if (Success)
|
||||
ArgAPInt = APFloatDp.bitcastToAPInt();
|
||||
return Success;
|
||||
}
|
||||
|
||||
// If this is a case we can't handle, return null and let the default
|
||||
// expansion code take care of it. If we CAN select this case, and if it
|
||||
// selects to a single instruction, return Op. Otherwise, if we can codegen
|
||||
|
@ -9232,9 +9264,23 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
|||
APInt APSplatBits, APSplatUndef;
|
||||
unsigned SplatBitSize;
|
||||
bool HasAnyUndefs;
|
||||
if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
|
||||
HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
|
||||
SplatBitSize > 32) {
|
||||
bool BVNIsConstantSplat =
|
||||
BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
|
||||
HasAnyUndefs, 0, !Subtarget.isLittleEndian());
|
||||
|
||||
// If it is a splat of a double, check if we can shrink it to a 32 bit
|
||||
// non-denormal float which when converted back to double gives us the same
|
||||
// double. This is to exploit the XXSPLTIDP instruction.
|
||||
if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() &&
|
||||
(SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) &&
|
||||
convertToNonDenormSingle(APSplatBits)) {
|
||||
SDValue SplatNode = DAG.getNode(
|
||||
PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
|
||||
DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
|
||||
return DAG.getBitcast(Op.getValueType(), SplatNode);
|
||||
}
|
||||
|
||||
if (!BVNIsConstantSplat || SplatBitSize > 32) {
|
||||
|
||||
const SDValue *InputLoad = getNormalLoadInput(Op.getOperand(0));
|
||||
// Handle load-and-splat patterns as we have instructions that will do this
|
||||
|
@ -9273,8 +9319,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
unsigned SplatBits = APSplatBits.getZExtValue();
|
||||
unsigned SplatUndef = APSplatUndef.getZExtValue();
|
||||
uint64_t SplatBits = APSplatBits.getZExtValue();
|
||||
uint64_t SplatUndef = APSplatUndef.getZExtValue();
|
||||
unsigned SplatSize = SplatBitSize / 8;
|
||||
|
||||
// First, handle single instruction cases.
|
||||
|
@ -9289,17 +9335,30 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
|||
return Op;
|
||||
}
|
||||
|
||||
// We have XXSPLTIB for constant splats one byte wide
|
||||
// FIXME: SplatBits is an unsigned int being cast to an int while passing it
|
||||
// as an argument to BuildSplatiI. Given SplatSize == 1 it is okay here.
|
||||
// We have XXSPLTIW for constant splats four bytes wide.
|
||||
// Given vector length is a multiple of 4, 2-byte splats can be replaced
|
||||
// with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
|
||||
// make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
|
||||
// turned into a 4-byte splat of 0xABABABAB.
|
||||
if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
|
||||
return getCanonicalConstSplat((SplatBits |= SplatBits << 16), SplatSize * 2,
|
||||
Op.getValueType(), DAG, dl);
|
||||
|
||||
if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
|
||||
return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
|
||||
dl);
|
||||
|
||||
// We have XXSPLTIB for constant splats one byte wide.
|
||||
if (Subtarget.hasP9Vector() && SplatSize == 1)
|
||||
return BuildSplatI(SplatBits, SplatSize, Op.getValueType(), DAG, dl);
|
||||
return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
|
||||
dl);
|
||||
|
||||
// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
|
||||
int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
|
||||
(32-SplatBitSize));
|
||||
if (SextVal >= -16 && SextVal <= 15)
|
||||
return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
|
||||
return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
|
||||
dl);
|
||||
|
||||
// Two instruction sequences.
|
||||
|
||||
|
@ -9330,7 +9389,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
|||
// for fneg/fabs.
|
||||
if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
|
||||
// Make -1 and vspltisw -1:
|
||||
SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
|
||||
SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
|
||||
|
||||
// Make the VSLW intrinsic, computing 0x8000_0000.
|
||||
SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
|
||||
|
@ -9358,7 +9417,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
|||
|
||||
// vsplti + shl self.
|
||||
if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
|
||||
SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
|
||||
SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
|
||||
static const unsigned IIDs[] = { // Intrinsic to use for each size.
|
||||
Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
|
||||
Intrinsic::ppc_altivec_vslw
|
||||
|
@ -9369,7 +9428,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
|||
|
||||
// vsplti + srl self.
|
||||
if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
|
||||
SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
|
||||
SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
|
||||
static const unsigned IIDs[] = { // Intrinsic to use for each size.
|
||||
Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
|
||||
Intrinsic::ppc_altivec_vsrw
|
||||
|
@ -9380,7 +9439,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
|||
|
||||
// vsplti + sra self.
|
||||
if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
|
||||
SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
|
||||
SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
|
||||
static const unsigned IIDs[] = { // Intrinsic to use for each size.
|
||||
Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
|
||||
Intrinsic::ppc_altivec_vsraw
|
||||
|
@ -9392,7 +9451,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
|||
// vsplti + rol self.
|
||||
if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
|
||||
((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
|
||||
SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
|
||||
SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
|
||||
static const unsigned IIDs[] = { // Intrinsic to use for each size.
|
||||
Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
|
||||
Intrinsic::ppc_altivec_vrlw
|
||||
|
@ -9403,19 +9462,19 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
|||
|
||||
// t = vsplti c, result = vsldoi t, t, 1
|
||||
if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
|
||||
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
|
||||
SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
|
||||
unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
|
||||
return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
|
||||
}
|
||||
// t = vsplti c, result = vsldoi t, t, 2
|
||||
if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
|
||||
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
|
||||
SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
|
||||
unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
|
||||
return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
|
||||
}
|
||||
// t = vsplti c, result = vsldoi t, t, 3
|
||||
if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
|
||||
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
|
||||
SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
|
||||
unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
|
||||
return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
|
||||
}
|
||||
|
@ -10817,9 +10876,9 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
|
|||
if (Op.getValueType() == MVT::v4i32) {
|
||||
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
|
||||
|
||||
SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl);
|
||||
SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
|
||||
|
||||
SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
|
||||
// +16 as shift amt.
|
||||
SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
|
||||
SDValue RHSSwap = // = vrlw RHS, 16
|
||||
BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
|
||||
|
||||
|
@ -16239,6 +16298,13 @@ bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
|
|||
return false;
|
||||
case MVT::f32:
|
||||
case MVT::f64:
|
||||
if (Subtarget.hasPrefixInstrs()) {
|
||||
// With prefixed instructions, we can materialize anything that can be
|
||||
// represented with a 32-bit immediate, not just positive zero.
|
||||
APFloat APFloatOfImm = Imm;
|
||||
return convertToNonDenormSingle(APFloatOfImm);
|
||||
}
|
||||
LLVM_FALLTHROUGH;
|
||||
case MVT::ppcf128:
|
||||
return Imm.isPosZero();
|
||||
}
|
||||
|
|
|
@ -97,6 +97,11 @@ namespace llvm {
|
|||
///
|
||||
XXSPLT,
|
||||
|
||||
/// XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for
|
||||
/// converting immediate single precision numbers to double precision
|
||||
/// vector or scalar.
|
||||
XXSPLTI_SP_TO_DP,
|
||||
|
||||
/// VECINSERT - The PPC vector insert instruction
|
||||
///
|
||||
VECINSERT,
|
||||
|
@ -1273,6 +1278,9 @@ namespace llvm {
|
|||
bool isIntS16Immediate(SDNode *N, int16_t &Imm);
|
||||
bool isIntS16Immediate(SDValue Op, int16_t &Imm);
|
||||
|
||||
bool convertToNonDenormSingle(APInt &ArgAPInt);
|
||||
bool convertToNonDenormSingle(APFloat &ArgAPFloat);
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
|
||||
|
|
|
@ -50,6 +50,10 @@ def SDT_PPCVecSplat : SDTypeProfile<1, 2, [ SDTCisVec<0>,
|
|||
SDTCisVec<1>, SDTCisInt<2>
|
||||
]>;
|
||||
|
||||
def SDT_PPCSpToDp : SDTypeProfile<1, 1, [ SDTCisVT<0, v2f64>,
|
||||
SDTCisInt<1>
|
||||
]>;
|
||||
|
||||
def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>,
|
||||
SDTCisVec<1>, SDTCisVec<2>, SDTCisPtrTy<3>
|
||||
]>;
|
||||
|
@ -194,6 +198,7 @@ def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
|
|||
|
||||
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
|
||||
def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>;
|
||||
def PPCxxspltidp : SDNode<"PPCISD::XXSPLTI_SP_TO_DP", SDT_PPCSpToDp, []>;
|
||||
def PPCvecinsert : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsert, []>;
|
||||
def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>;
|
||||
def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>;
|
||||
|
@ -326,6 +331,23 @@ def PPCmatpcreladdr : SDNode<"PPCISD::MAT_PCREL_ADDR", SDTIntUnaryOp, []>;
|
|||
// PowerPC specific transformation functions and pattern fragments.
|
||||
//
|
||||
|
||||
// A floating point immediate that is not a positive zero and can be converted
|
||||
// to a single precision floating point non-denormal immediate without loss of
|
||||
// information.
|
||||
def nzFPImmAsi32 : PatLeaf<(fpimm), [{
|
||||
APFloat APFloatOfN = N->getValueAPF();
|
||||
return convertToNonDenormSingle(APFloatOfN) && !N->isExactlyValue(+0.0);
|
||||
}]>;
|
||||
|
||||
// Convert the floating point immediate into a 32 bit floating point immediate
|
||||
// and get a i32 with the resulting bits.
|
||||
def getFPAs32BitInt : SDNodeXForm<fpimm, [{
|
||||
APFloat APFloatOfN = N->getValueAPF();
|
||||
convertToNonDenormSingle(APFloatOfN);
|
||||
return CurDAG->getTargetConstant(APFloatOfN.bitcastToAPInt().getZExtValue(),
|
||||
SDLoc(N), MVT::i32);
|
||||
}]>;
|
||||
|
||||
def SHL32 : SDNodeXForm<imm, [{
|
||||
// Transformation function: 31 - imm
|
||||
return getI32Imm(31 - N->getZExtValue(), SDLoc(N));
|
||||
|
@ -392,6 +414,7 @@ def immZExt16 : PatLeaf<(imm), [{
|
|||
def immNonAllOneAnyExt8 : ImmLeaf<i32, [{
|
||||
return (isInt<8>(Imm) && (Imm != -1)) || (isUInt<8>(Imm) && (Imm != 0xFF));
|
||||
}]>;
|
||||
def i32immNonAllOneNonZero : ImmLeaf<i32, [{ return Imm && (Imm != -1); }]>;
|
||||
def immSExt5NonZero : ImmLeaf<i32, [{ return Imm && isInt<5>(Imm); }]>;
|
||||
|
||||
// imm16Shifted* - These match immediates where the low 16-bits are zero. There
|
||||
|
|
|
@ -704,7 +704,8 @@ let Predicates = [PrefixInstrs] in {
|
|||
def XXSPLTIDP : 8RR_DForm_IMM32_XT6<32, 2, (outs vsrc:$XT),
|
||||
(ins i32imm:$IMM32),
|
||||
"xxspltidp $XT, $IMM32", IIC_VecGeneral,
|
||||
[]>;
|
||||
[(set v2f64:$XT,
|
||||
(PPCxxspltidp i32:$IMM32))]>;
|
||||
def XXSPLTI32DX :
|
||||
8RR_DForm_IMM32_XT6_IX<32, 0, (outs vsrc:$XT),
|
||||
(ins vsrc:$XTi, i1imm:$IX, i32imm:$IMM32),
|
||||
|
@ -822,3 +823,17 @@ let Predicates = [IsISA3_1] in {
|
|||
def : Pat<(v2i64 (int_ppc_vsx_xxgenpcvdm v2i64:$VRB, imm:$IMM)),
|
||||
(v2i64 (COPY_TO_REGCLASS (XXGENPCVDM $VRB, imm:$IMM), VRRC))>;
|
||||
}
|
||||
|
||||
let AddedComplexity = 400, Predicates = [PrefixInstrs] in {
|
||||
def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A,
|
||||
i32immNonAllOneNonZero:$A,
|
||||
i32immNonAllOneNonZero:$A,
|
||||
i32immNonAllOneNonZero:$A)),
|
||||
(v4i32 (XXSPLTIW imm:$A))>;
|
||||
def : Pat<(f32 nzFPImmAsi32:$A),
|
||||
(COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)),
|
||||
VSFRC)>;
|
||||
def : Pat<(f64 nzFPImmAsi32:$A),
|
||||
(COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)),
|
||||
VSFRC)>;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,111 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
|
||||
; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
|
||||
; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s \
|
||||
; RUN: --check-prefix=CHECK-NOPCREL
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
|
||||
; RUN: -mattr=-pcrelative-memops -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \
|
||||
; RUN: FileCheck %s --check-prefix=CHECK-NOPCREL
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
|
||||
; RUN: -ppc-asm-full-reg-names -target-abi=elfv2 -mcpu=pwr10 < %s | \
|
||||
; RUN: FileCheck %s
|
||||
|
||||
define dso_local <2 x double> @testDoubleToDoubleFail() local_unnamed_addr {
|
||||
; CHECK-LABEL: testDoubleToDoubleFail:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: plxv vs34, .LCPI0_0@PCREL(0), 1
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-NOPCREL-LABEL: testDoubleToDoubleFail:
|
||||
; CHECK-NOPCREL: # %bb.0: # %entry
|
||||
; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI0_0@toc@ha
|
||||
; CHECK-NOPCREL-NEXT: addi r3, r3, .LCPI0_0@toc@l
|
||||
; CHECK-NOPCREL-NEXT: lxvx vs34, 0, r3
|
||||
; CHECK-NOPCREL-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <2 x double> <double 3.423300e+02, double 3.423300e+02>
|
||||
}
|
||||
|
||||
define dso_local <2 x double> @testFloatDenormToDouble() local_unnamed_addr {
|
||||
; CHECK-LABEL: testFloatDenormToDouble:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: plxv vs34, .LCPI1_0@PCREL(0), 1
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-NOPCREL-LABEL: testFloatDenormToDouble:
|
||||
; CHECK-NOPCREL: # %bb.0: # %entry
|
||||
; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI1_0@toc@ha
|
||||
; CHECK-NOPCREL-NEXT: addi r3, r3, .LCPI1_0@toc@l
|
||||
; CHECK-NOPCREL-NEXT: lxvx vs34, 0, r3
|
||||
; CHECK-NOPCREL-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <2 x double> <double 0x380B38FB80000000, double 0x380B38FB80000000>
|
||||
}
|
||||
|
||||
define dso_local <2 x double> @testDoubleToDoubleNaNFail() local_unnamed_addr {
|
||||
; CHECK-LABEL: testDoubleToDoubleNaNFail:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: plxv vs34, .LCPI2_0@PCREL(0), 1
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-NOPCREL-LABEL: testDoubleToDoubleNaNFail:
|
||||
; CHECK-NOPCREL: # %bb.0: # %entry
|
||||
; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI2_0@toc@ha
|
||||
; CHECK-NOPCREL-NEXT: addi r3, r3, .LCPI2_0@toc@l
|
||||
; CHECK-NOPCREL-NEXT: lxvx vs34, 0, r3
|
||||
; CHECK-NOPCREL-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <2 x double> <double 0xFFFFFFFFFFFFFFF0, double 0xFFFFFFFFFFFFFFF0>
|
||||
}
|
||||
|
||||
define dso_local double @testDoubleNonRepresentableScalar() local_unnamed_addr {
|
||||
; CHECK-LABEL: testDoubleNonRepresentableScalar:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: plfd f1, .LCPI3_0@PCREL(0), 1
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-NOPCREL-LABEL: testDoubleNonRepresentableScalar:
|
||||
; CHECK-NOPCREL: # %bb.0: # %entry
|
||||
; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI3_0@toc@ha
|
||||
; CHECK-NOPCREL-NEXT: lfd f1, .LCPI3_0@toc@l(r3)
|
||||
; CHECK-NOPCREL-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret double 3.423300e+02
|
||||
}
|
||||
|
||||
define dso_local float @testFloatDenormScalar() local_unnamed_addr {
|
||||
; CHECK-LABEL: testFloatDenormScalar:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: plfs f1, .LCPI4_0@PCREL(0), 1
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-NOPCREL-LABEL: testFloatDenormScalar:
|
||||
; CHECK-NOPCREL: # %bb.0: # %entry
|
||||
; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI4_0@toc@ha
|
||||
; CHECK-NOPCREL-NEXT: lfs f1, .LCPI4_0@toc@l(r3)
|
||||
; CHECK-NOPCREL-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret float 0x380B38FB80000000
|
||||
}
|
||||
|
||||
define dso_local double @testFloatDenormToDoubleScalar() local_unnamed_addr {
|
||||
; CHECK-LABEL: testFloatDenormToDoubleScalar:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: plfs f1, .LCPI5_0@PCREL(0), 1
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-NOPCREL-LABEL: testFloatDenormToDoubleScalar:
|
||||
; CHECK-NOPCREL: # %bb.0: # %entry
|
||||
; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI5_0@toc@ha
|
||||
; CHECK-NOPCREL-NEXT: lfs f1, .LCPI5_0@toc@l(r3)
|
||||
; CHECK-NOPCREL-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret double 0x380B38FB80000000
|
||||
}
|
|
@ -0,0 +1,288 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
|
||||
; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
|
||||
; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s
|
||||
|
||||
define dso_local <4 x i32> @testZero() local_unnamed_addr {
|
||||
; CHECK-LABEL: testZero:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxlxor vs34, vs34, vs34
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <4 x i32> zeroinitializer
|
||||
}
|
||||
|
||||
define dso_local <4 x float> @testZeroF() local_unnamed_addr {
|
||||
; CHECK-LABEL: testZeroF:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxlxor vs34, vs34, vs34
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <4 x float> zeroinitializer
|
||||
}
|
||||
|
||||
define dso_local <4 x i32> @testAllOneS() local_unnamed_addr {
|
||||
; CHECK-LABEL: testAllOneS:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxleqv vs34, vs34, vs34
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
}
|
||||
|
||||
define dso_local <4 x i32> @test5Bit() local_unnamed_addr {
|
||||
; CHECK-LABEL: test5Bit:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vspltisw v2, 7
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <4 x i32> <i32 7, i32 7, i32 7, i32 7>
|
||||
}
|
||||
|
||||
define dso_local <16 x i8> @test1ByteChar() local_unnamed_addr {
|
||||
; CHECK-LABEL: test1ByteChar:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxspltib vs34, 7
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <16 x i8> <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
|
||||
}
|
||||
|
||||
define dso_local <4 x i32> @test1ByteSplatInt() local_unnamed_addr {
|
||||
; Here the splat of 171 or 0xABABABAB can be done using a byte splat
|
||||
; of 0xAB using xxspltib while avoiding the use of xxspltiw.
|
||||
; CHECK-LABEL: test1ByteSplatInt:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxspltib vs34, 171
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <4 x i32> <i32 -1414812757, i32 -1414812757, i32 -1414812757, i32 -1414812757>
|
||||
}
|
||||
|
||||
define dso_local <4 x i32> @test5Bit2Ins() local_unnamed_addr {
|
||||
; Splats within the range [-32,31] can be done using two vsplti[bhw]
|
||||
; instructions, but we prefer the xxspltiw instruction to them.
|
||||
; CHECK-LABEL: test5Bit2Ins:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxspltiw vs34, 16
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <4 x i32> <i32 16, i32 16, i32 16, i32 16>
|
||||
}
|
||||
|
||||
define dso_local <4 x float> @testFloatNegZero() local_unnamed_addr {
|
||||
; 0.0f is not the same as -0.0f. We try to splat -0.0f
|
||||
; CHECK-LABEL: testFloatNegZero:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxspltiw vs34, -2147483648
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
|
||||
}
|
||||
|
||||
define dso_local <4 x float> @testFloat() local_unnamed_addr {
|
||||
; CHECK-LABEL: testFloat:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxspltiw vs34, 1135323709
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <4 x float> <float 0x40757547A0000000, float 0x40757547A0000000, float 0x40757547A0000000, float 0x40757547A0000000>
|
||||
}
|
||||
|
||||
define dso_local <4 x float> @testIntToFloat() local_unnamed_addr {
|
||||
; CHECK-LABEL: testIntToFloat:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxspltiw vs34, 1135312896
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <4 x float> <float 3.430000e+02, float 3.430000e+02, float 3.430000e+02, float 3.430000e+02>
|
||||
}
|
||||
|
||||
define dso_local <4 x i32> @testUndefInt() local_unnamed_addr {
|
||||
; CHECK-LABEL: testUndefInt:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxspltiw vs34, 18
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <4 x i32> <i32 18, i32 undef, i32 undef, i32 18>
|
||||
}
|
||||
|
||||
define dso_local <4 x float> @testUndefIntToFloat() local_unnamed_addr {
|
||||
; CHECK-LABEL: testUndefIntToFloat:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxspltiw vs34, 1135312896
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <4 x float> <float 3.430000e+02, float undef, float undef, float 3.430000e+02>
|
||||
}
|
||||
|
||||
define dso_local <2 x i64> @testPseudo8Byte() local_unnamed_addr {
|
||||
; CHECK-LABEL: testPseudo8Byte:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxspltiw vs34, -1430532899
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <2 x i64> <i64 -6144092014192636707, i64 -6144092014192636707>
|
||||
}
|
||||
|
||||
define dso_local <8 x i16> @test2Byte() local_unnamed_addr {
|
||||
; CHECK-LABEL: test2Byte:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxspltiw vs34, 1179666
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <8 x i16> <i16 18, i16 18, i16 18, i16 18, i16 18, i16 18, i16 18, i16 18>
|
||||
}
|
||||
|
||||
define dso_local <8 x i16> @test2ByteUndef() local_unnamed_addr {
|
||||
; CHECK-LABEL: test2ByteUndef:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxspltiw vs34, 1179666
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <8 x i16> <i16 18, i16 undef, i16 18, i16 18, i16 18, i16 undef, i16 18, i16 18>
|
||||
}
|
||||
|
||||
define dso_local <2 x double> @testFloatToDouble() local_unnamed_addr {
|
||||
; CHECK-LABEL: testFloatToDouble:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxspltidp vs34, 1135290941
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <2 x double> <double 0x40756547A0000000, double 0x40756547A0000000>
|
||||
}
|
||||
|
||||
define dso_local <2 x double> @testDoubleLower4ByteZero() local_unnamed_addr {
|
||||
; The expanded double will have 0 in the last 32 bits. Imprecise handling of
|
||||
; return value of data structures like APInt, returned when calling getZextValue
|
||||
; , like saving the return value into an unsigned instead of uint64_t may cause
|
||||
; this test to fail.
|
||||
; CHECK-LABEL: testDoubleLower4ByteZero:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxspltidp vs34, 1093664768
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <2 x double> <double 1.100000e+01, double 1.100000e+01>
|
||||
}
|
||||
|
||||
define dso_local <2 x double> @testDoubleToDoubleZero() local_unnamed_addr {
|
||||
; Should be using canonicalized form to splat zero and use shorter instructions
|
||||
; than xxspltidp.
|
||||
; CHECK-LABEL: testDoubleToDoubleZero:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxlxor vs34, vs34, vs34
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <2 x double> zeroinitializer
|
||||
}
|
||||
|
||||
define dso_local <2 x double> @testDoubleToDoubleNegZero() local_unnamed_addr {
|
||||
; CHECK-LABEL: testDoubleToDoubleNegZero:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxspltidp vs34, -2147483648
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <2 x double> <double -0.000000e+00, double -0.000000e+00>
|
||||
}
|
||||
|
||||
define dso_local <2 x double> @testDoubleToDoubleNaN() local_unnamed_addr {
|
||||
; CHECK-LABEL: testDoubleToDoubleNaN:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxspltidp vs34, -16
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <2 x double> <double 0xFFFFFFFE00000000, double 0xFFFFFFFE00000000>
|
||||
}
|
||||
|
||||
define dso_local <2 x double> @testDoubleToDoubleInfinity() local_unnamed_addr {
|
||||
; CHECK-LABEL: testDoubleToDoubleInfinity:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxspltidp vs34, 2139095040
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <2 x double> <double 0x7FF0000000000000, double 0x7FF0000000000000>
|
||||
}
|
||||
|
||||
define dso_local <2 x double> @testFloatToDoubleNaN() local_unnamed_addr {
|
||||
; CHECK-LABEL: testFloatToDoubleNaN:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxspltidp vs34, -1
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <2 x double> <double 0xFFFFFFFFE0000000, double 0xFFFFFFFFE0000000>
|
||||
}
|
||||
|
||||
define dso_local <2 x double> @testFloatToDoubleInfinity() local_unnamed_addr {
|
||||
; CHECK-LABEL: testFloatToDoubleInfinity:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxspltidp vs34, 2139095040
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret <2 x double> <double 0x7FF0000000000000, double 0x7FF0000000000000>
|
||||
}
|
||||
|
||||
define dso_local float @testFloatScalar() local_unnamed_addr {
|
||||
; CHECK-LABEL: testFloatScalar:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxspltidp vs1, 1135290941
|
||||
; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret float 0x40756547A0000000
|
||||
}
|
||||
|
||||
define dso_local float @testFloatZeroScalar() local_unnamed_addr {
|
||||
; CHECK-LABEL: testFloatZeroScalar:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxlxor f1, f1, f1
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret float 0.000000e+00
|
||||
}
|
||||
|
||||
define dso_local double @testDoubleRepresentableScalar() local_unnamed_addr {
|
||||
; CHECK-LABEL: testDoubleRepresentableScalar:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxspltidp vs1, 1135290941
|
||||
; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret double 0x40756547A0000000
|
||||
}
|
||||
|
||||
define dso_local double @testDoubleZeroScalar() local_unnamed_addr {
|
||||
; CHECK-LABEL: testDoubleZeroScalar:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxlxor f1, f1, f1
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
entry:
|
||||
ret double 0.000000e+00
|
||||
}
|
Loading…
Reference in New Issue