Add the PPC lfiwax instruction

This instruction is available on modern PPC64 CPUs, and is now used to improve the SINT_TO_FP lowering (by eliminating the need for the separate sign extension instruction and decreasing the amount of needed stack space). llvm-svn: 178446
2013-03-31 10:12:51 +00:00 · 2013-03-31 10:12:51 +00:00 · beb296bea1
parent e53429a13e
commit beb296bea1
8 changed files with 82 additions and 22 deletions
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@ -59,6 +59,8 @@ def FeatureFSqrt     : SubtargetFeature<"fsqrt","HasFSQRT", "true",
                                        "Enable the fsqrt instruction">;
 def FeatureSTFIWX    : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
                                        "Enable the stfiwx instruction">;
+def FeatureLFIWAX    : SubtargetFeature<"lfiwax","HasLFIWAX", "true",
+                                        "Enable the lfiwax instruction">;
 def FeatureFPRND     : SubtargetFeature<"fprnd", "HasFPRND", "true",
                                        "Enable the fri[mnpz] instructions">;
 def FeatureISEL      : SubtargetFeature<"isel","HasISEL", "true",
@ -80,7 +82,6 @@ def FeatureQPX       : SubtargetFeature<"qpx","HasQPX", "true",
 // FLT_CVT      p7                 fcfids, fcfidu, fcfidus, fcfiduz, fctiwuz
 // FRE          p5 through p7      fre (vs. fres, available since p3)
 // FRSQRTES     p5 through p7      frsqrtes (vs. frsqrte, available since p3)
-// LFIWAX       p6, p6x, p7        lfiwax
 // LFIWZX       p7                 lfiwzx
 // POPCNTB      p5 through p7      popcntb and related instructions
 // RECIP_PREC   p6, p6x, p7        higher precision reciprocal estimates
@ -133,14 +134,15 @@ def : ProcessorModel<"e5500", PPCE5500Model,
                   FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
 def : Processor<"a2", PPCA2Itineraries,
                  [DirectiveA2, FeatureBookE, FeatureMFOCRF,
-                   FeatureFSqrt, FeatureSTFIWX, FeatureFPRND,
-                   FeatureISEL, FeaturePOPCNTD, FeatureLDBRX,
-                   Feature64Bit /*, Feature64BitRegs */]>;
+                   FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX,
+                   FeatureFPRND, FeatureISEL, FeaturePOPCNTD,
+                   FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */]>;
 def : Processor<"a2q", PPCA2Itineraries,
                  [DirectiveA2, FeatureBookE, FeatureMFOCRF,
-                   FeatureFSqrt, FeatureSTFIWX, FeatureFPRND,
-                   FeatureISEL, FeaturePOPCNTD, FeatureLDBRX,
-                   Feature64Bit /*, Feature64BitRegs */, FeatureQPX]>;
+                   FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX,
+                   FeatureFPRND, FeatureISEL, FeaturePOPCNTD,
+                   FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */,
+                   FeatureQPX]>;
 def : Processor<"pwr3", G5Itineraries,
                  [DirectivePwr3, FeatureAltivec, FeatureMFOCRF,
                   FeatureSTFIWX, Feature64Bit]>;
@ -157,16 +159,18 @@ def : Processor<"pwr5x", G5Itineraries,
 def : Processor<"pwr6", G5Itineraries,
                  [DirectivePwr6, FeatureAltivec,
                   FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
-                   FeatureFPRND, Feature64Bit /*, Feature64BitRegs */]>;
+                   FeatureLFIWAX, FeatureFPRND, Feature64Bit
+               /*, Feature64BitRegs */]>;
 def : Processor<"pwr6x", G5Itineraries,
                  [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
-                   FeatureFSqrt, FeatureSTFIWX, FeatureFPRND,
-                   Feature64Bit]>;
+                   FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX,
+                   FeatureFPRND, Feature64Bit]>;
 def : Processor<"pwr7", G5Itineraries,
                  [DirectivePwr7, FeatureAltivec,
                   FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
-                   FeatureFPRND, FeatureISEL, FeaturePOPCNTD,
-                   FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */]>;
+                   FeatureLFIWAX, FeatureFPRND, FeatureISEL,
+                   FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
+               /*, Feature64BitRegs */]>;
 def : Processor<"ppc", G3Itineraries, [Directive32]>;
 def : Processor<"ppc64", G5Itineraries,
                  [Directive64, FeatureAltivec,
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@ -4809,20 +4809,43 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
  // then lfd it and fcfid it.
  MachineFunction &MF = DAG.getMachineFunction();
  MachineFrameInfo *FrameInfo = MF.getFrameInfo();
-  int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);

-  SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
-                              Op.getOperand(0));
+  SDValue Ld;
+  if (PPCSubTarget.hasLFIWAX()) {
+    int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
+    SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);

-  // STD the extended value into the stack slot.
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
-                               MachinePointerInfo(), false, false, 0);
+    SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
+                                 MachinePointerInfo::getFixedStack(FrameIdx),
+                                 false, false, 0);

-  // Load the value as a double.
-  SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(),
-                           false, false, false, 0);
+    assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
+           "Expected an i32 store");
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+                              MachineMemOperand::MOLoad, 4, 4);
+    SDValue Ops[] = { Store, FIdx };
+    Ld = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
+                                 DAG.getVTList(MVT::f64, MVT::Other), Ops, 2,
+                                 MVT::i32, MMO);
+  } else {
+    int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
+    SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+    SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
+                                Op.getOperand(0));
+
+    // STD the extended value into the stack slot.
+    SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
+                                 MachinePointerInfo::getFixedStack(FrameIdx),
+                                 false, false, 0);
+
+    // Load the value as a double.
+    Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx,
+                     MachinePointerInfo::getFixedStack(FrameIdx),
+                     false, false, false, 0);
+  }

  // FCFID it and return it.
  SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@ -242,6 +242,11 @@ namespace llvm {
      /// or i32.
      LBRX,

+      /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point
+      /// load which sign-extends from a 32-bit integer value into the
+      /// destination 64-bit register.
+      LFIWAX,
+
      /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model,
      /// produces an ADDIS8 instruction that adds the TOC base register to
      /// sym@toc@ha.
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@ -20,6 +20,10 @@ include "PPCInstrFormats.td"
 def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx
  SDTCisVT<0, f64>, SDTCisPtrTy<1>
 ]>;
+def SDT_PPClfiwax : SDTypeProfile<1, 1, [ // lfiwax
+  SDTCisVT<0, f64>, SDTCisPtrTy<1>
+]>;
+
 def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
 def SDT_PPCCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
                                         SDTCisVT<1, i32> ]>;
@ -63,6 +67,8 @@ def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>;
 def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
 def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
                       [SDNPHasChain, SDNPMayStore]>;
+def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwax,
+                       [SDNPHasChain, SDNPMayLoad]>;

 // Extract FPSCR (not modeled at the DAG level).
 def PPCmffs   : SDNode<"PPCISD::MFFS",
@ -843,6 +849,10 @@ def LFSX   : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src),
 def LFDX   : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
                      "lfdx $frD, $src", LdStLFD,
                      [(set f64:$frD, (load xaddr:$src))]>;
+
+def LFIWAX : XForm_25<31, 855, (outs F8RC:$frD), (ins memrr:$src),
+                      "lfiwax $frD, $src", LdStLFD,
+                      [(set f64:$frD, (PPClfiwax xoaddr:$src))]>;
 }

 //===----------------------------------------------------------------------===//
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@ -528,6 +528,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,

  bool noImmForm = false;
  switch (OpC) {
+  case PPC::LFIWAX:
  case PPC::LVEBX:
  case PPC::LVEHX:
  case PPC::LVEWX:
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@ -39,6 +39,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
  , HasQPX(false)
  , HasFSQRT(false)
  , HasSTFIWX(false)
+  , HasLFIWAX(false)
  , HasFPRND(false)
  , HasISEL(false)
  , HasPOPCNTD(false)
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@ -78,6 +78,7 @@ protected:
  bool HasQPX;
  bool HasFSQRT;
  bool HasSTFIWX;
+  bool HasLFIWAX;
  bool HasFPRND;
  bool HasISEL;
  bool HasPOPCNTD;
@ -158,6 +159,7 @@ public:
  // Specific obvious features.
  bool hasFSQRT() const { return HasFSQRT; }
  bool hasSTFIWX() const { return HasSTFIWX; }
+  bool hasLFIWAX() const { return HasLFIWAX; }
  bool hasFPRND() const { return HasFPRND; }
  bool hasAltivec() const { return HasAltivec; }
  bool hasQPX() const { return HasQPX; }
--- a/llvm/test/CodeGen/PowerPC/i32-to-float.ll
+++ b/llvm/test/CodeGen/PowerPC/i32-to-float.ll
@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck -check-prefix=CHECK-A2 %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"

@ -14,6 +15,13 @@ entry:
 ; CHECK: fcfid [[REG3:[0-9]+]], [[REG2]]
 ; CHECK: frsp 1, [[REG3]]
 ; CHECK: blr
+
+; CHECK-A2: @foo
+; CHECK-A2: stw 3,
+; CHECK-A2: lfiwax [[REG:[0-9]+]],
+; CHECK-A2: fcfid [[REG2:[0-9]+]], [[REG]]
+; CHECK-A2: frsp 1, [[REG2]]
+; CHECK-A2: blr
 }

 define double @goo(i32 %a) nounwind {
@ -27,5 +35,11 @@ entry:
 ; CHECK: lfd [[REG2:[0-9]+]],
 ; CHECK: fcfid 1, [[REG2]]
 ; CHECK: blr
+
+; CHECK-A2: @goo
+; CHECK-A2: stw 3,
+; CHECK-A2: lfiwax [[REG:[0-9]+]],
+; CHECK-A2: fcfid 1, [[REG]]
+; CHECK-A2: blr
 }