Compile this:

void foo(float a, int *b) { *b = a; } to this: _foo: fctiwz f0, f1 stfiwx f0, 0, r4 blr instead of this: _foo: fctiwz f0, f1 stfd f0, -8(r1) lwz r2, -4(r1) stw r2, 0(r4) blr This implements CodeGen/PowerPC/stfiwx.ll, and also incidentally does the right thing for GCC bugzilla 26505. llvm-svn: 26447
2006-03-01 05:50:56 +00:00 · 2006-03-01 05:50:56 +00:00 · 27f5345b1f
parent 160cc92461
commit 27f5345b1f
5 changed files with 42 additions and 13 deletions
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@ -187,6 +187,7 @@ PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
  
  // We have target-specific dag combine patterns for the following nodes:
  setTargetDAGCombine(ISD::SINT_TO_FP);
+  setTargetDAGCombine(ISD::STORE);
  
  computeRegisterProperties();
 }
@ -198,6 +199,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
  case PPCISD::FCFID:         return "PPCISD::FCFID";
  case PPCISD::FCTIDZ:        return "PPCISD::FCTIDZ";
  case PPCISD::FCTIWZ:        return "PPCISD::FCTIWZ";
+  case PPCISD::STFIWX:        return "PPCISD::STFIWX";
  case PPCISD::VMADDFP:       return "PPCISD::VMADDFP";
  case PPCISD::VNMSUBFP:      return "PPCISD::VNMSUBFP";
  case PPCISD::Hi:            return "PPCISD::Hi";
@ -1032,6 +1034,25 @@ SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N,
      }
    }
    break;
+  case ISD::STORE:
+    // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
+    if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
+        N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
+        N->getOperand(1).getValueType() == MVT::i32) {
+      SDOperand Val = N->getOperand(1).getOperand(0);
+      if (Val.getValueType() == MVT::f32) {
+        Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
+        DCI.AddToWorklist(Val.Val);
+      }
+      Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val);
+      DCI.AddToWorklist(Val.Val);
+
+      Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val,
+                        N->getOperand(2), N->getOperand(3));
+      DCI.AddToWorklist(Val.Val);
+      return Val;
+    }
+    break;
  }
  
  return SDOperand();
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@ -39,6 +39,11 @@ namespace llvm {
      /// of that FP value.
      FCTIDZ, FCTIWZ,
      
+      /// STFIWX - The STFIWX instruction.  The first operand is an input token
+      /// chain, then an f64 value to store, then an address to store it to,
+      /// then a SRCVALUE for the address.
+      STFIWX,
+      
      // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
      // three v4f32 operands and producing a v4f32 result.
      VMADDFP, VNMSUBFP,
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@ -14,6 +14,18 @@

 include "PPCInstrFormats.td"

+//===----------------------------------------------------------------------===//
+// PowerPC specific type constraints.
+//
+def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx
+  SDTCisVT<0, f64>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCShiftOp : SDTypeProfile<1, 2, [   // PPCshl, PPCsra, PPCsrl
+  SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>
+]>;
+def SDT_PPCCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
+def SDT_PPCRetFlag : SDTypeProfile<0, 0, []>;
+
 //===----------------------------------------------------------------------===//
 // PowerPC specific DAG Nodes.
 //
@ -21,6 +33,7 @@ include "PPCInstrFormats.td"
 def PPCfcfid  : SDNode<"PPCISD::FCFID" , SDTFPUnaryOp, []>;
 def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>;
 def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
+def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx, [SDNPHasChain]>;

 def PPCfsel   : SDNode<"PPCISD::FSEL",  
   // Type constraint for fsel.
@ -34,19 +47,14 @@ def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;

 // These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
 // amounts.  These nodes are generated by the multi-precision shift code.
-def SDT_PPCShiftOp : SDTypeProfile<1, 2, [   // PPCshl, PPCsra, PPCsrl
-  SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>
-]>;
 def PPCsrl        : SDNode<"PPCISD::SRL"       , SDT_PPCShiftOp>;
 def PPCsra        : SDNode<"PPCISD::SRA"       , SDT_PPCShiftOp>;
 def PPCshl        : SDNode<"PPCISD::SHL"       , SDT_PPCShiftOp>;

 // These are target-independent nodes, but have target-specific formats.
-def SDT_PPCCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeq,[SDNPHasChain]>;
 def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_PPCCallSeq,[SDNPHasChain]>;

-def SDT_PPCRetFlag : SDTypeProfile<0, 0, []>;
 def retflag       : SDNode<"PPCISD::RET_FLAG", SDT_PPCRetFlag,
 	                   [SDNPHasChain, SDNPOptInFlag]>;

@ -636,9 +644,9 @@ def FNEGD  : XForm_26<63, 40, (ops F8RC:$frD, F8RC:$frB),
                      
                      
 let isStore = 1, noResults = 1 in {
-def STFIWX: XForm_28<31, 983, (ops F4RC:$frS, memrr:$dst),
+def STFIWX: XForm_28<31, 983, (ops F8RC:$frS, memrr:$dst),
                     "stfiwx $frS, $dst", LdStUX,
-                     []>;
+                     [(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>;
 def STFSX : XForm_28<31, 663, (ops F4RC:$frS, memrr:$dst),
                     "stfsx $frS, $dst", LdStUX,
                     [(store F4RC:$frS, xaddr:$dst)]>;
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@ -77,6 +77,7 @@ PPCSubtarget::PPCSubtarget(const Module &M, const std::string &FS)
  , Has64BitRegs(false)
  , HasAltivec(false)
  , HasFSQRT(false)
+  , HasSTFIWX(false)
  , IsAIX(false)
  , IsDarwin(false) {

--- a/llvm/lib/Target/PowerPC/README.txt
+++ b/llvm/lib/Target/PowerPC/README.txt
@ -5,12 +5,6 @@ TODO:

 ===-------------------------------------------------------------------------===

-Use the stfiwx instruction for:
-
-void foo(float a, int *b) { *b = a; }
-
-===-------------------------------------------------------------------------===
-
 Support 'update' load/store instructions.  These are cracked on the G5, but are
 still a codesize win.