Large stack frame patch for the CellSPU: handle stack frames that exceed 8176

(511*16) bytes register displacement (D-form). NOTE: This is a potential headache, given the SPU's local core limitations, allowing the software developer to commit stack overrun suicide unknowingly. Also, large SPU stack frames will cause code size explosion. But, one presumes that the software developer knows what they're doing... Contributed by Kalle.Raiskila@nokia.com, edited slightly before commit. llvm-svn: 97091
2010-02-25 01:53:17 +00:00 · 2010-02-25 01:53:17 +00:00 · 0ab0ad93bd
parent 65eb879d22
commit 0ab0ad93bd
3 changed files with 150 additions and 11 deletions
--- a/llvm/lib/Target/CellSPU/SPU.h
+++ b/llvm/lib/Target/CellSPU/SPU.h
@ -66,9 +66,6 @@ namespace llvm {
  //! Predicate test for an unsigned 10-bit value
  /*!
    \param Value The input value to be tested
-
-    This predicate tests for an unsigned 10-bit value, returning the 10-bit value
-    as a short if true.
   */
  inline bool isU10Constant(short Value) {
    return (Value == (Value & 0x3ff));
@ -90,6 +87,70 @@ namespace llvm {
    return (Value == (Value & 0x3ff));
  }

+  //! Predicate test for a signed 14-bit value
+  /*!
+    \param Value The input value to be tested
+   */
+  template<typename T>
+  inline bool isS14Constant(T Value);
+
+  template<>
+  inline bool isS14Constant<short>(short Value) {
+    return (Value >= -(1 << 13) && Value <= (1 << 13) - 1);
+  }
+
+  template<>
+  inline bool isS14Constant<int>(int Value) {
+    return (Value >= -(1 << 13) && Value <= (1 << 13) - 1);
+  }
+
+  template<>
+  inline bool isS14Constant<uint32_t>(uint32_t Value) {
+    return (Value <= ((1 << 13) - 1));
+  }
+
+  template<>
+  inline bool isS14Constant<int64_t>(int64_t Value) {
+    return (Value >= -(1 << 13) && Value <= (1 << 13) - 1);
+  }
+
+  template<>
+  inline bool isS14Constant<uint64_t>(uint64_t Value) {
+    return (Value <= ((1 << 13) - 1));
+  }
+
+  //! Predicate test for a signed 16-bit value
+  /*!
+    \param Value The input value to be tested
+   */
+  template<typename T>
+  inline bool isS16Constant(T Value);
+
+  template<>
+  inline bool isS16Constant<short>(short Value) {
+    return true;
+  }
+
+  template<>
+  inline bool isS16Constant<int>(int Value) {
+    return (Value >= -(1 << 15) && Value <= (1 << 15) - 1);
+  }
+
+  template<>
+  inline bool isS16Constant<uint32_t>(uint32_t Value) {
+    return (Value <= ((1 << 15) - 1));
+  }
+
+  template<>
+  inline bool isS16Constant<int64_t>(int64_t Value) {
+    return (Value >= -(1 << 15) && Value <= (1 << 15) - 1);
+  }
+
+  template<>
+  inline bool isS16Constant<uint64_t>(uint64_t Value) {
+    return (Value <= ((1 << 15) - 1));
+  }
+
  extern Target TheCellSPUTarget;

 }
--- a/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp
@ -28,6 +28,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineLocation.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
@ -335,6 +336,7 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
  MachineBasicBlock &MBB = *MI.getParent();
  MachineFunction &MF = *MBB.getParent();
  MachineFrameInfo *MFI = MF.getFrameInfo();
+  DebugLoc dl = II->getDebugLoc();

  while (!MI.getOperand(i).isFI()) {
    ++i;
@ -363,11 +365,22 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,

  // Replace the FrameIndex with base register with $sp (aka $r1)
  SPOp.ChangeToRegister(SPU::R1, false);
-  if (Offset > SPUFrameInfo::maxFrameOffset()
-      || Offset < SPUFrameInfo::minFrameOffset()) {
-    errs() << "Large stack adjustment ("
-         << Offset
-         << ") in SPURegisterInfo::eliminateFrameIndex.";
+
+  // if 'Offset' doesn't fit to the D-form instruction's
+  // immediate, convert the instruction to X-form
+  // if the instruction is not an AI (which takes a s10 immediate), assume
+  // it is a load/store that can take a s14 immediate
+  if ( (MI.getOpcode() == SPU::AIr32 && !isS10Constant(Offset))
+       || !isS14Constant(Offset) ) {
+    int newOpcode = convertDFormToXForm(MI.getOpcode());
+    unsigned tmpReg = findScratchRegister(II, RS, &SPU::R32CRegClass, SPAdj);
+    BuildMI(MBB, II, dl, TII.get(SPU::ILr32), tmpReg )
+        .addImm(Offset);
+    BuildMI(MBB, II, dl, TII.get(newOpcode), MI.getOperand(0).getReg())
+        .addReg(tmpReg, RegState::Kill)
+        .addReg(SPU::R1);
+    // remove the replaced D-form instruction
+    MBB.erase(II);
  } else {
    MO.ChangeToImmediate(Offset);
  }
@ -422,6 +435,14 @@ void SPURegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
  MF.getRegInfo().setPhysRegUnused(SPU::R0);
  MF.getRegInfo().setPhysRegUnused(SPU::R1);
  MF.getRegInfo().setPhysRegUnused(SPU::R2);
+
+  MachineFrameInfo *MFI = MF.getFrameInfo(); 
+  const TargetRegisterClass *RC = &SPU::R32CRegClass;
+  RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                     RC->getAlignment(),
+                                                     false));
+  
+  
 }

 void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
@ -466,7 +487,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
      // Adjust $sp by required amout
      BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1)
        .addImm(FrameSize);
-    } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) {
+    } else if (isS16Constant(FrameSize)) {
      // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
      // $r2 to adjust $sp:
      BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
@ -474,7 +495,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
        .addReg(SPU::R1);
      BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
        .addImm(FrameSize);
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1)
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1)
        .addReg(SPU::R2)
        .addReg(SPU::R1);
      BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
@ -573,7 +594,7 @@ SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
        .addReg(SPU::R2);
      BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
        .addImm(16)
-        .addReg(SPU::R2);
+        .addReg(SPU::R1);
      BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2).
        addReg(SPU::R2)
        .addImm(16);
@ -617,4 +638,42 @@ SPURegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
  return SPUGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
 }

+int 
+SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
+{
+  switch(dFormOpcode) 
+  {
+    case SPU::AIr32:     return SPU::Ar32;
+    case SPU::LQDr32:    return SPU::LQXr32;
+    case SPU::LQDr128:   return SPU::LQXr128;
+    case SPU::LQDv16i8:  return SPU::LQXv16i8;
+    case SPU::LQDv4f32:  return SPU::LQXv4f32;
+    case SPU::STQDr32:   return SPU::STQXr32;
+    case SPU::STQDr128:  return SPU::STQXr128;
+    case SPU::STQDv4i32: return SPU::STQXv4i32;
+    case SPU::STQDv4f32: return SPU::STQXv4f32;
+
+    default: assert( false && "Unhandled D to X-form conversion");
+  }
+  // default will assert, but need to return something to keep the
+  // compiler happy.
+  return dFormOpcode;
+}
+
+// TODO this is already copied from PPC. Could this convenience function
+// be moved to the RegScavenger class?
+unsigned  
+SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II, 
+                                     RegScavenger *RS,
+                                     const TargetRegisterClass *RC, 
+                                     int SPAdj) const
+{
+  assert(RS && "Register scavenging must be on");
+  unsigned Reg = RS->FindUnusedReg(RC);
+  if (Reg == 0)
+    Reg = RS->scavengeRegister(RC, II, SPAdj);
+  assert( Reg && "Register scavenger failed");
+  return Reg;
+}
+
 #include "SPUGenRegisterInfo.inc"
--- a/llvm/lib/Target/CellSPU/SPURegisterInfo.h
+++ b/llvm/lib/Target/CellSPU/SPURegisterInfo.h
@ -53,6 +53,10 @@ namespace llvm {
    virtual const TargetRegisterClass* const *
      getCalleeSavedRegClasses(const MachineFunction *MF) const;

+    //! Allow for scavenging, so we can get scratch registers when needed.
+    virtual bool requiresRegisterScavenging(const MachineFunction &MF) const
+    { return true; }
+
    //! Return the reserved registers
    BitVector getReservedRegs(const MachineFunction &MF) const;

@ -97,6 +101,21 @@ namespace llvm {

    //! Get DWARF debugging register number
    int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+
+    //! Convert D-form load/store to X-form load/store
+    /*!
+      Converts a regiser displacement load/store into a register-indexed
+      load/store for large stack frames, when the stack frame exceeds the
+      range of a s10 displacement.
+     */
+    int convertDFormToXForm(int dFormOpcode) const;
+
+    //! Acquire an unused register in an emergency.
+    unsigned findScratchRegister(MachineBasicBlock::iterator II,
+                                 RegScavenger *RS,
+                                 const TargetRegisterClass *RC, 
+                                 int SPAdj) const;
+    
  };
 } // end namespace llvm