[PowerPC] Exploit xxsplti32dx (constant materialization) for scalars

This patch exploits the xxsplti32dx instruction available on Power10 in place of constant pool loads where xxspltidp would not be able to, usually because the immediate cannot fit into 32 bits. Differential Revision: https://reviews.llvm.org/D95458
2021-03-24 15:57:27 -04:00 · 2021-03-24 15:57:27 -04:00 · e29bb074c6
parent e122877f10
commit e29bb074c6
10 changed files with 159 additions and 61 deletions
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@ -8820,6 +8820,18 @@ bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
  return Success;
 }

+// Nondestructive check for convertTonNonDenormSingle.
+bool llvm::checkConvertToNonDenormSingle(APFloat &ArgAPFloat) {
+  // Only convert if it loses info, since XXSPLTIDP should
+  // handle the other case.
+  APFloat APFloatToConvert = ArgAPFloat;
+  bool LosesInfo = true;
+  APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
+                           &LosesInfo);
+
+  return (!LosesInfo && !APFloatToConvert.isDenormal());
+}
+
 // If this is a case we can't handle, return null and let the default
 // expansion code take care of it.  If we CAN select this case, and if it
 // selects to a single instruction, return Op.  Otherwise, if we can codegen
@ -16115,10 +16127,8 @@ bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
  case MVT::f32:
  case MVT::f64:
    if (Subtarget.hasPrefixInstrs()) {
-      // With prefixed instructions, we can materialize anything that can be
-      // represented with a 32-bit immediate, not just positive zero.
-      APFloat APFloatOfImm = Imm;
-      return convertToNonDenormSingle(APFloatOfImm);
+      // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
+      return true;
    }
    LLVM_FALLTHROUGH;
  case MVT::ppcf128:
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@ -1332,6 +1332,7 @@ namespace llvm {

  bool convertToNonDenormSingle(APInt &ArgAPInt);
  bool convertToNonDenormSingle(APFloat &ArgAPFloat);
+  bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat);

 } // end namespace llvm

--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@ -1108,6 +1108,7 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
  case PPC::XXLXORspz:
  case PPC::XXLXORdpz:
  case PPC::XXLEQVOnes:
+  case PPC::XXSPLTI32DX:
  case PPC::V_SET0B:
  case PPC::V_SET0H:
  case PPC::V_SET0:
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@ -399,6 +399,30 @@ def getFPAs32BitInt : SDNodeXForm<fpimm, [{
                                   SDLoc(N), MVT::i32);
 }]>;

+// Check if the value can be converted to be single precision immediate, which
+// can be exploited by XXSPLTIDP. Ensure that it cannot be converted to single
+// precision before exploiting with XXSPLTI32DX.
+def nzFPImmAsi64 : PatLeaf<(fpimm), [{
+  APFloat APFloatOfN = N->getValueAPF();
+  return !N->isExactlyValue(+0.0) && !checkConvertToNonDenormSingle(APFloatOfN);
+}]>;
+
+// Get the Hi bits of a 64 bit immediate.
+def getFPAs64BitIntHi : SDNodeXForm<fpimm, [{
+  APFloat APFloatOfN = N->getValueAPF();
+  uint32_t Hi = (uint32_t)((APFloatOfN.bitcastToAPInt().getZExtValue() &
+                            0xFFFFFFFF00000000LL) >> 32);
+  return CurDAG->getTargetConstant(Hi, SDLoc(N), MVT::i32);
+}]>;
+
+// Get the Lo bits of a 64 bit immediate.
+def getFPAs64BitIntLo : SDNodeXForm<fpimm, [{
+  APFloat APFloatOfN = N->getValueAPF();
+  uint32_t Lo = (uint32_t)(APFloatOfN.bitcastToAPInt().getZExtValue() &
+                           0xFFFFFFFF);
+  return CurDAG->getTargetConstant(Lo, SDLoc(N), MVT::i32);
+}]>;
+
 def imm34 : PatLeaf<(imm), [{
  return isInt<34>(N->getSExtValue());
 }]>;
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@ -1867,14 +1867,6 @@ let Predicates = [PrefixInstrs] in {
                                      "xxspltidp $XT, $IMM32", IIC_VecGeneral,
                                      [(set v2f64:$XT,
                                            (PPCxxspltidp i32:$IMM32))]>;
-  def XXSPLTI32DX :
-      8RR_DForm_IMM32_XT6_IX<32, 0, (outs vsrc:$XT),
-                             (ins vsrc:$XTi, u1imm:$IX, i32imm:$IMM32),
-                             "xxsplti32dx $XT, $IX, $IMM32", IIC_VecGeneral,
-                             [(set v2i64:$XT,
-                                   (PPCxxsplti32dx v2i64:$XTi, i32:$IX,
-                                                   i32:$IMM32))]>,
-                             RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
  def XXPERMX :
    8RR_XX4Form_IMM3_XTABC6<34, 0, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB,
                            vsrc:$XC, u3imm:$UIM),
@ -1898,6 +1890,19 @@ let Predicates = [PrefixInstrs] in {
                       IIC_VecGeneral, []>;
 }

+// XXSPLI32DX needs extra flags to make sure the compiler does not attempt
+// to spill part of the instruction when the values are similar.
+let isReMaterializable = 1, isMoveImm = 1, Predicates = [PrefixInstrs] in {
+  def XXSPLTI32DX :
+      8RR_DForm_IMM32_XT6_IX<32, 0, (outs vsrc:$XT),
+                             (ins vsrc:$XTi, u1imm:$IX, i32imm:$IMM32),
+                             "xxsplti32dx $XT, $IX, $IMM32", IIC_VecGeneral,
+                             [(set v2i64:$XT,
+                                   (PPCxxsplti32dx v2i64:$XTi, i32:$IX,
+                                                   i32:$IMM32))]>,
+                             RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+}
+
 let Predicates = [IsISA3_1] in {
  def SETBC : XForm_XT5_BI5<31, 384, (outs gprc:$RT), (ins crbitrc:$BI),
                            "setbc $RT, $BI", IIC_IntCompare, []>;
@ -2623,6 +2628,19 @@ let AddedComplexity = 400, Predicates = [PrefixInstrs] in {
           (COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)),
                             VSFRC)>;

+// To replace constant pool with XXSPLTI32DX for scalars.
+def : Pat<(f32 nzFPImmAsi64:$A),
+          (COPY_TO_REGCLASS (XXSPLTI32DX (XXSPLTI32DX(IMPLICIT_DEF), 0,
+                                        (getFPAs64BitIntHi $A)),
+                                        1, (getFPAs64BitIntLo $A)),
+                            VSRC)>;
+
+def : Pat<(f64 nzFPImmAsi64:$A),
+          (COPY_TO_REGCLASS (XXSPLTI32DX (XXSPLTI32DX (IMPLICIT_DEF), 0,
+                                        (getFPAs64BitIntHi $A)),
+                                        1, (getFPAs64BitIntLo $A)),
+                            VSRC)>;
+
  // Anonymous patterns for XXEVAL
  // AND
  // and(A, B, C)
--- a/llvm/test/CodeGen/PowerPC/constant-pool.ll
+++ b/llvm/test/CodeGen/PowerPC/constant-pool.ll
@ -9,7 +9,9 @@
 define float @FloatConstantPool() {
 ; CHECK-LABEL: FloatConstantPool:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plfs f1, .LCPI0_0@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs1, 0, 0
+; CHECK-NEXT:    xxsplti32dx vs1, 1, 8388577
+; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: FloatConstantPool:
@ -24,7 +26,9 @@ entry:
 define double @DoubleConstantPool() {
 ; CHECK-LABEL: DoubleConstantPool:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plfd f1, .LCPI1_0@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs1, 0, 1048574
+; CHECK-NEXT:    xxsplti32dx vs1, 1, 780229072
+; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: DoubleConstantPool:
@ -39,8 +43,12 @@ entry:
 define ppc_fp128 @LongDoubleConstantPool() {
 ; CHECK-LABEL: LongDoubleConstantPool:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plfd f1, .LCPI2_0@PCREL(0), 1
-; CHECK-NEXT:    plfd f2, .LCPI2_1@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs1, 0, 56623104
+; CHECK-NEXT:    xxsplti32dx vs2, 0, -2146625897
+; CHECK-NEXT:    xxsplti32dx vs1, 1, -609716532
+; CHECK-NEXT:    xxsplti32dx vs2, 1, 1339675259
+; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; CHECK-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: LongDoubleConstantPool:
@ -185,9 +193,11 @@ entry:
 define double @two_constants(double %a) {
 ; CHECK-LABEL: two_constants:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plfd f0, .LCPI11_0@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs0, 0, 1074446467
+; CHECK-NEXT:    xxsplti32dx vs0, 1, 309237645
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    plfd f1, .LCPI11_1@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs1, 0, 1073922179
+; CHECK-NEXT:    xxsplti32dx vs1, 1, 309237645
 ; CHECK-NEXT:    xsadddp f1, f0, f1
 ; CHECK-NEXT:    blr
 ;
@ -212,11 +222,15 @@ define double @two_constants_two_bb(i32 %m, double %a) {
 ; CHECK-NEXT:    cmplwi r3, 0
 ; CHECK-NEXT:    beq cr0, .LBB12_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    plfd f1, .LCPI12_0@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs1, 0, 1074935889
+; CHECK-NEXT:    xxsplti32dx vs1, 1, -343597384
+; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
 ; CHECK-NEXT:  .LBB12_2: # %if.end
-; CHECK-NEXT:    plfd f0, .LCPI12_1@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs0, 0, 1076085391
+; CHECK-NEXT:    xxsplti32dx vs0, 1, 1546188227
 ; CHECK-NEXT:    xsadddp f1, f1, f0
+; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: two_constants_two_bb:
@ -248,11 +262,14 @@ return:
 define double @three_constants_f64(double %a, double %c) {
 ; CHECK-LABEL: three_constants_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plfd f0, .LCPI13_0@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs0, 0, 1074446467
+; CHECK-NEXT:    xxsplti32dx vs0, 1, 309237645
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    plfd f1, .LCPI13_1@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs1, 0, 1073922179
+; CHECK-NEXT:    xxsplti32dx vs1, 1, 309237645
 ; CHECK-NEXT:    xsadddp f0, f0, f1
-; CHECK-NEXT:    plfd f1, .LCPI13_2@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs1, 0, 1073948393
+; CHECK-NEXT:    xxsplti32dx vs1, 1, 2027224564
 ; CHECK-NEXT:    xsadddp f1, f0, f1
 ; CHECK-NEXT:    blr
 ;
@ -340,21 +357,26 @@ entry:

 define ppc_fp128 @three_constants_ppcf128(ppc_fp128 %a, ppc_fp128 %c) {
 ; CHECK-LABEL: three_constants_ppcf128:
-; CHECK:         .localentry three_constants_ppcf128, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    plfd f3, .LCPI16_0@PCREL(0), 1
-; CHECK-NEXT:    xxlxor f4, f4, f4
+; CHECK-DAG:     xxlxor f4, f4, f4
+; CHECK-DAG:     xxsplti32dx vs3, 0, 1074935889
+; CHECK-NEXT:    xxsplti32dx vs3, 1, -343597384
+; CHECK-NEXT:    # kill: def $f3 killed $f3 killed $vsl3
 ; CHECK-NEXT:    bl __gcc_qadd@notoc
-; CHECK-NEXT:    plfd f3, .LCPI16_1@PCREL(0), 1
-; CHECK-NEXT:    xxlxor f4, f4, f4
+; CHECK-DAG:     xxlxor f4, f4, f4
+; CHECK-DAG:     xxsplti32dx vs3, 0, 1074935889
+; CHECK-NEXT:    xxsplti32dx vs3, 1, -1719329096
+; CHECK-NEXT:    # kill: def $f3 killed $f3 killed $vsl3
 ; CHECK-NEXT:    bl __gcc_qadd@notoc
-; CHECK-NEXT:    plfd f3, .LCPI16_2@PCREL(0), 1
-; CHECK-NEXT:    xxlxor f4, f4, f4
+; CHECK-DAG:     xxlxor f4, f4, f4
+; CHECK-DAG:     xxsplti32dx vs3, 0, 1074935889
+; CHECK-NEXT:    xxsplti32dx vs3, 1, 8724152
+; CHECK-NEXT:    # kill: def $f3 killed $f3 killed $vsl3
 ; CHECK-NEXT:    bl __gcc_qadd@notoc
 ; CHECK-NEXT:    addi r1, r1, 32
 ; CHECK-NEXT:    ld r0, 16(r1)
--- a/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
@ -122,19 +122,23 @@ entry:
 define dso_local double @testDoubleNonRepresentableScalar() local_unnamed_addr {
 ; CHECK-LE-LABEL: testDoubleNonRepresentableScalar:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    plfd f1, .LCPI3_0@PCREL(0), 1
+; CHECK-LE-NEXT:    xxsplti32dx vs1, 0, 1081435463
+; CHECK-LE-NEXT:    xxsplti32dx vs1, 1, -1374389535
+; CHECK-LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-NOPCREL-BE-LABEL: testDoubleNonRepresentableScalar:
 ; CHECK-NOPCREL-BE:       # %bb.0: # %entry
-; CHECK-NOPCREL-BE-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
-; CHECK-NOPCREL-BE-NEXT:    lfd f1, .LCPI3_0@toc@l(r3)
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs1, 0, 1081435463
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs1, 1, -1374389535
+; CHECK-NOPCREL-BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NOPCREL-BE-NEXT:    blr
 ;
 ; CHECK-NOPCREL-LE-LABEL: testDoubleNonRepresentableScalar:
 ; CHECK-NOPCREL-LE:       # %bb.0: # %entry
-; CHECK-NOPCREL-LE-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
-; CHECK-NOPCREL-LE-NEXT:    lfd f1, .LCPI3_0@toc@l(r3)
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs1, 0, 1081435463
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs1, 1, -1374389535
+; CHECK-NOPCREL-LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NOPCREL-LE-NEXT:    blr
 ;
 ; CHECK-NOPREFIX-LABEL: testDoubleNonRepresentableScalar:
@ -145,7 +149,9 @@ define dso_local double @testDoubleNonRepresentableScalar() local_unnamed_addr {
 ;
 ; CHECK-BE-LABEL: testDoubleNonRepresentableScalar:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    plfd f1, .LCPI3_0@PCREL(0), 1
+; CHECK-BE-NEXT:    xxsplti32dx vs1, 0, 1081435463
+; CHECK-BE-NEXT:    xxsplti32dx vs1, 1, -1374389535
+; CHECK-BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-BE-NEXT:    blr
 entry:
  ret double 3.423300e+02
@ -154,19 +160,23 @@ entry:
 define dso_local float @testFloatDenormScalar() local_unnamed_addr {
 ; CHECK-LE-LABEL: testFloatDenormScalar:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    plfs f1, .LCPI4_0@PCREL(0), 1
+; CHECK-LE-NEXT:    xxsplti32dx vs1, 0, 0
+; CHECK-LE-NEXT:    xxsplti32dx vs1, 1, 7136238
+; CHECK-LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-NOPCREL-BE-LABEL: testFloatDenormScalar:
 ; CHECK-NOPCREL-BE:       # %bb.0: # %entry
-; CHECK-NOPCREL-BE-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
-; CHECK-NOPCREL-BE-NEXT:    lfs f1, .LCPI4_0@toc@l(r3)
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs1, 0, 0
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs1, 1, 7136238
+; CHECK-NOPCREL-BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NOPCREL-BE-NEXT:    blr
 ;
 ; CHECK-NOPCREL-LE-LABEL: testFloatDenormScalar:
 ; CHECK-NOPCREL-LE:       # %bb.0: # %entry
-; CHECK-NOPCREL-LE-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
-; CHECK-NOPCREL-LE-NEXT:    lfs f1, .LCPI4_0@toc@l(r3)
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs1, 0, 0
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs1, 1, 7136238
+; CHECK-NOPCREL-LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NOPCREL-LE-NEXT:    blr
 ;
 ; CHECK-NOPREFIX-LABEL: testFloatDenormScalar:
@ -177,7 +187,9 @@ define dso_local float @testFloatDenormScalar() local_unnamed_addr {
 ;
 ; CHECK-BE-LABEL: testFloatDenormScalar:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    plfs f1, .LCPI4_0@PCREL(0), 1
+; CHECK-BE-NEXT:    xxsplti32dx vs1, 0, 0
+; CHECK-BE-NEXT:    xxsplti32dx vs1, 1, 7136238
+; CHECK-BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-BE-NEXT:    blr
 entry:
  ret float 0x380B38FB80000000
@ -186,19 +198,23 @@ entry:
 define dso_local double @testFloatDenormToDoubleScalar() local_unnamed_addr {
 ; CHECK-LE-LABEL: testFloatDenormToDoubleScalar:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    plfs f1, .LCPI5_0@PCREL(0), 1
+; CHECK-LE-NEXT:    xxsplti32dx vs1, 0, 940259579
+; CHECK-LE-NEXT:    xxsplti32dx vs1, 1, -2147483648
+; CHECK-LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-NOPCREL-BE-LABEL: testFloatDenormToDoubleScalar:
 ; CHECK-NOPCREL-BE:       # %bb.0: # %entry
-; CHECK-NOPCREL-BE-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
-; CHECK-NOPCREL-BE-NEXT:    lfs f1, .LCPI5_0@toc@l(r3)
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs1, 0, 940259579
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs1, 1, -2147483648
+; CHECK-NOPCREL-BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NOPCREL-BE-NEXT:    blr
 ;
 ; CHECK-NOPCREL-LE-LABEL: testFloatDenormToDoubleScalar:
 ; CHECK-NOPCREL-LE:       # %bb.0: # %entry
-; CHECK-NOPCREL-LE-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
-; CHECK-NOPCREL-LE-NEXT:    lfs f1, .LCPI5_0@toc@l(r3)
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs1, 0, 940259579
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs1, 1, -2147483648
+; CHECK-NOPCREL-LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NOPCREL-LE-NEXT:    blr
 ;
 ; CHECK-NOPREFIX-LABEL: testFloatDenormToDoubleScalar:
@ -209,7 +225,9 @@ define dso_local double @testFloatDenormToDoubleScalar() local_unnamed_addr {
 ;
 ; CHECK-BE-LABEL: testFloatDenormToDoubleScalar:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    plfs f1, .LCPI5_0@PCREL(0), 1
+; CHECK-BE-NEXT:    xxsplti32dx vs1, 0, 940259579
+; CHECK-BE-NEXT:    xxsplti32dx vs1, 1, -2147483648
+; CHECK-BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-BE-NEXT:    blr
 entry:
  ret double 0x380B38FB80000000
--- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
@ -173,7 +173,9 @@ define dso_local double @UsesX2AsConstPoolTOC() local_unnamed_addr {
 ; CHECK-LARGE:     add r2, r2, r12
 ; CHECK-S-NOT:       .localentry
 ; CHECK-ALL:       # %bb.0: # %entry
-; CHECK-S-NEXT:    plfd f1, .LCPI7_0@PCREL(0), 1
+; CHECK-S-NEXT:    xxsplti32dx vs1, 0, 1078011044
+; CHECK-S-NEXT:    xxsplti32dx vs1, 1, -337824948
+; CHECK-S-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-S-NEXT:    blr
 entry:
  ret double 0x404124A4EBDD334C
--- a/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
@ -35,6 +35,9 @@
@FuncPtrOut = external local_unnamed_addr global void (...)*, align 8

 define dso_local void @ReadWrite8() local_unnamed_addr #0 {
+; In this test the stb r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lbz r3, 0(r3)
+; which is defined between the pld and the stb.
 ; CHECK-LABEL: ReadWrite8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pld r3, input8@got@pcrel(0), 1
@ -44,9 +47,6 @@ define dso_local void @ReadWrite8() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lbz r3, 0(r3)
 ; CHECK-NEXT:    stb r3, 0(r4)
 ; CHECK-NEXT:    blr
-; In this test the stb r3, 0(r4) cannot be optimized because it
-; uses the register r3 and that register is defined by lbz r3, 0(r3)
-; which is defined between the pld and the stb.
 entry:
  %0 = load i8, i8* @input8, align 1
  store i8 %0, i8* @output8, align 1
@ -54,6 +54,9 @@ entry:
 }

 define dso_local void @ReadWrite16() local_unnamed_addr #0 {
+; In this test the sth r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lhz r3, 0(r3)
+; which is defined between the pld and the sth.
 ; CHECK-LABEL: ReadWrite16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pld r3, input16@got@pcrel(0), 1
@ -63,9 +66,6 @@ define dso_local void @ReadWrite16() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lhz r3, 0(r3)
 ; CHECK-NEXT:    sth r3, 0(r4)
 ; CHECK-NEXT:    blr
-; In this test the sth r3, 0(r4) cannot be optimized because it
-; uses the register r3 and that register is defined by lhz r3, 0(r3)
-; which is defined between the pld and the sth.
 entry:
  %0 = load i16, i16* @input16, align 2
  store i16 %0, i16* @output16, align 2
@ -144,7 +144,8 @@ define dso_local void @ReadWritef64() local_unnamed_addr #0 {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pld r3, inputf64@got@pcrel(0), 1
 ; CHECK-NEXT:  .Lpcrel5:
-; CHECK-NEXT:    plfd f1, .LCPI6_0@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs1, 0, 1075524403
+; CHECK-NEXT:    xxsplti32dx vs1, 1, 858993459
 ; CHECK-NEXT:    .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8)
 ; CHECK-NEXT:    lfd f0, 0(r3)
 ; CHECK-NEXT:    pld r3, outputf64@got@pcrel(0), 1
--- a/llvm/test/CodeGen/PowerPC/pcrel.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel.ll
@ -8,13 +8,14 @@

 ; Constant Pool Index.
 ; CHECK-S-LABEL: ConstPool
-; CHECK-S:       plfd f1, .LCPI0_0@PCREL(0), 1
+; CHECK-S:       xxsplti32dx vs1, 0, 1081002676
+; CHECK-S-NEXT:       xxsplti32dx vs1, 1, 962072674
 ; CHECK-S:       blr

 ; CHECK-O-LABEL: ConstPool
-; CHECK-O:       plfd 1, 0(0), 1
-; CHECK-O-NEXT:  R_PPC64_PCREL34  .rodata.cst8
-; CHECK-O:       blr
+; CHECK-O:       xxsplti32dx 1, 0, 1081002676
+; CHECK-O-NEXT:  xxsplti32dx 1, 1, 962072674
+; CHECK-O-NEXT:  blr
 define dso_local double @ConstPool() local_unnamed_addr {
  entry:
    ret double 0x406ECAB439581062