From 0ab0ad93bd13efc92fa87e6e23ad12e5c4421147 Mon Sep 17 00:00:00 2001 From: Scott Michel Date: Thu, 25 Feb 2010 01:53:17 +0000 Subject: [PATCH] Large stack frame patch for the CellSPU: handle stack frames that exceed 8176 (511*16) bytes register displacement (D-form). NOTE: This is a potential headache, given the SPU's local core limitations, allowing the software developer to commit stack overrun suicide unknowingly. Also, large SPU stack frames will cause code size explosion. But, one presumes that the software developer knows what they're doing... Contributed by Kalle.Raiskila@nokia.com, edited slightly before commit. llvm-svn: 97091 --- llvm/lib/Target/CellSPU/SPU.h | 67 +++++++++++++++++- llvm/lib/Target/CellSPU/SPURegisterInfo.cpp | 75 ++++++++++++++++++--- llvm/lib/Target/CellSPU/SPURegisterInfo.h | 19 ++++++ 3 files changed, 150 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/CellSPU/SPU.h b/llvm/lib/Target/CellSPU/SPU.h index c96097494ea3..c6208121902c 100644 --- a/llvm/lib/Target/CellSPU/SPU.h +++ b/llvm/lib/Target/CellSPU/SPU.h @@ -66,9 +66,6 @@ namespace llvm { //! Predicate test for an unsigned 10-bit value /*! \param Value The input value to be tested - - This predicate tests for an unsigned 10-bit value, returning the 10-bit value - as a short if true. */ inline bool isU10Constant(short Value) { return (Value == (Value & 0x3ff)); @@ -90,6 +87,70 @@ namespace llvm { return (Value == (Value & 0x3ff)); } + //! Predicate test for a signed 14-bit value + /*! + \param Value The input value to be tested + */ + template + inline bool isS14Constant(T Value); + + template<> + inline bool isS14Constant(short Value) { + return (Value >= -(1 << 13) && Value <= (1 << 13) - 1); + } + + template<> + inline bool isS14Constant(int Value) { + return (Value >= -(1 << 13) && Value <= (1 << 13) - 1); + } + + template<> + inline bool isS14Constant(uint32_t Value) { + return (Value <= ((1 << 13) - 1)); + } + + template<> + inline bool isS14Constant(int64_t Value) { + return (Value >= -(1 << 13) && Value <= (1 << 13) - 1); + } + + template<> + inline bool isS14Constant(uint64_t Value) { + return (Value <= ((1 << 13) - 1)); + } + + //! Predicate test for a signed 16-bit value + /*! + \param Value The input value to be tested + */ + template + inline bool isS16Constant(T Value); + + template<> + inline bool isS16Constant(short Value) { + return true; + } + + template<> + inline bool isS16Constant(int Value) { + return (Value >= -(1 << 15) && Value <= (1 << 15) - 1); + } + + template<> + inline bool isS16Constant(uint32_t Value) { + return (Value <= ((1 << 15) - 1)); + } + + template<> + inline bool isS16Constant(int64_t Value) { + return (Value >= -(1 << 15) && Value <= (1 << 15) - 1); + } + + template<> + inline bool isS16Constant(uint64_t Value) { + return (Value <= ((1 << 15) - 1)); + } + extern Target TheCellSPUTarget; } diff --git a/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp b/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp index af94e67bbdce..1c36cdbbfd20 100644 --- a/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetInstrInfo.h" @@ -335,6 +336,7 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); + DebugLoc dl = II->getDebugLoc(); while (!MI.getOperand(i).isFI()) { ++i; @@ -363,11 +365,22 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, // Replace the FrameIndex with base register with $sp (aka $r1) SPOp.ChangeToRegister(SPU::R1, false); - if (Offset > SPUFrameInfo::maxFrameOffset() - || Offset < SPUFrameInfo::minFrameOffset()) { - errs() << "Large stack adjustment (" - << Offset - << ") in SPURegisterInfo::eliminateFrameIndex."; + + // if 'Offset' doesn't fit to the D-form instruction's + // immediate, convert the instruction to X-form + // if the instruction is not an AI (which takes a s10 immediate), assume + // it is a load/store that can take a s14 immediate + if ( (MI.getOpcode() == SPU::AIr32 && !isS10Constant(Offset)) + || !isS14Constant(Offset) ) { + int newOpcode = convertDFormToXForm(MI.getOpcode()); + unsigned tmpReg = findScratchRegister(II, RS, &SPU::R32CRegClass, SPAdj); + BuildMI(MBB, II, dl, TII.get(SPU::ILr32), tmpReg ) + .addImm(Offset); + BuildMI(MBB, II, dl, TII.get(newOpcode), MI.getOperand(0).getReg()) + .addReg(tmpReg, RegState::Kill) + .addReg(SPU::R1); + // remove the replaced D-form instruction + MBB.erase(II); } else { MO.ChangeToImmediate(Offset); } @@ -422,6 +435,14 @@ void SPURegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, MF.getRegInfo().setPhysRegUnused(SPU::R0); MF.getRegInfo().setPhysRegUnused(SPU::R1); MF.getRegInfo().setPhysRegUnused(SPU::R2); + + MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterClass *RC = &SPU::R32CRegClass; + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + + } void SPURegisterInfo::emitPrologue(MachineFunction &MF) const @@ -466,7 +487,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const // Adjust $sp by required amout BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1) .addImm(FrameSize); - } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) { + } else if (isS16Constant(FrameSize)) { // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use // $r2 to adjust $sp: BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2) @@ -474,7 +495,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const .addReg(SPU::R1); BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2) .addImm(FrameSize); - BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1) + BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1) .addReg(SPU::R2) .addReg(SPU::R1); BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1) @@ -573,7 +594,7 @@ SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const .addReg(SPU::R2); BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0) .addImm(16) - .addReg(SPU::R2); + .addReg(SPU::R1); BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2). addReg(SPU::R2) .addImm(16); @@ -617,4 +638,42 @@ SPURegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { return SPUGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); } +int +SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const +{ + switch(dFormOpcode) + { + case SPU::AIr32: return SPU::Ar32; + case SPU::LQDr32: return SPU::LQXr32; + case SPU::LQDr128: return SPU::LQXr128; + case SPU::LQDv16i8: return SPU::LQXv16i8; + case SPU::LQDv4f32: return SPU::LQXv4f32; + case SPU::STQDr32: return SPU::STQXr32; + case SPU::STQDr128: return SPU::STQXr128; + case SPU::STQDv4i32: return SPU::STQXv4i32; + case SPU::STQDv4f32: return SPU::STQXv4f32; + + default: assert( false && "Unhandled D to X-form conversion"); + } + // default will assert, but need to return something to keep the + // compiler happy. + return dFormOpcode; +} + +// TODO this is already copied from PPC. Could this convenience function +// be moved to the RegScavenger class? +unsigned +SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II, + RegScavenger *RS, + const TargetRegisterClass *RC, + int SPAdj) const +{ + assert(RS && "Register scavenging must be on"); + unsigned Reg = RS->FindUnusedReg(RC); + if (Reg == 0) + Reg = RS->scavengeRegister(RC, II, SPAdj); + assert( Reg && "Register scavenger failed"); + return Reg; +} + #include "SPUGenRegisterInfo.inc" diff --git a/llvm/lib/Target/CellSPU/SPURegisterInfo.h b/llvm/lib/Target/CellSPU/SPURegisterInfo.h index 9691cb657030..883bf5e328d4 100644 --- a/llvm/lib/Target/CellSPU/SPURegisterInfo.h +++ b/llvm/lib/Target/CellSPU/SPURegisterInfo.h @@ -53,6 +53,10 @@ namespace llvm { virtual const TargetRegisterClass* const * getCalleeSavedRegClasses(const MachineFunction *MF) const; + //! Allow for scavenging, so we can get scratch registers when needed. + virtual bool requiresRegisterScavenging(const MachineFunction &MF) const + { return true; } + //! Return the reserved registers BitVector getReservedRegs(const MachineFunction &MF) const; @@ -97,6 +101,21 @@ namespace llvm { //! Get DWARF debugging register number int getDwarfRegNum(unsigned RegNum, bool isEH) const; + + //! Convert D-form load/store to X-form load/store + /*! + Converts a regiser displacement load/store into a register-indexed + load/store for large stack frames, when the stack frame exceeds the + range of a s10 displacement. + */ + int convertDFormToXForm(int dFormOpcode) const; + + //! Acquire an unused register in an emergency. + unsigned findScratchRegister(MachineBasicBlock::iterator II, + RegScavenger *RS, + const TargetRegisterClass *RC, + int SPAdj) const; + }; } // end namespace llvm