Large stack frame patch for the CellSPU: handle stack frames that exceed 8176

(511*16) bytes register displacement (D-form).

NOTE: This is a potential headache, given the SPU's local core limitations,
allowing the software developer to commit stack overrun suicide unknowingly.
Also, large SPU stack frames will cause code size explosion. But, one presumes
that the software developer knows what they're doing...

Contributed by Kalle.Raiskila@nokia.com, edited slightly before commit.

llvm-svn: 97091
This commit is contained in:
Scott Michel 2010-02-25 01:53:17 +00:00
parent 65eb879d22
commit 0ab0ad93bd
3 changed files with 150 additions and 11 deletions

View File

@ -66,9 +66,6 @@ namespace llvm {
//! Predicate test for an unsigned 10-bit value
/*!
\param Value The input value to be tested
This predicate tests for an unsigned 10-bit value, returning the 10-bit value
as a short if true.
*/
inline bool isU10Constant(short Value) {
return (Value == (Value & 0x3ff));
@ -90,6 +87,70 @@ namespace llvm {
return (Value == (Value & 0x3ff));
}
//! Predicate test for a signed 14-bit value
/*!
\param Value The input value to be tested
*/
template<typename T>
inline bool isS14Constant(T Value);
template<>
inline bool isS14Constant<short>(short Value) {
return (Value >= -(1 << 13) && Value <= (1 << 13) - 1);
}
template<>
inline bool isS14Constant<int>(int Value) {
return (Value >= -(1 << 13) && Value <= (1 << 13) - 1);
}
template<>
inline bool isS14Constant<uint32_t>(uint32_t Value) {
return (Value <= ((1 << 13) - 1));
}
template<>
inline bool isS14Constant<int64_t>(int64_t Value) {
return (Value >= -(1 << 13) && Value <= (1 << 13) - 1);
}
template<>
inline bool isS14Constant<uint64_t>(uint64_t Value) {
return (Value <= ((1 << 13) - 1));
}
//! Predicate test for a signed 16-bit value
/*!
\param Value The input value to be tested
*/
template<typename T>
inline bool isS16Constant(T Value);
template<>
inline bool isS16Constant<short>(short Value) {
return true;
}
template<>
inline bool isS16Constant<int>(int Value) {
return (Value >= -(1 << 15) && Value <= (1 << 15) - 1);
}
template<>
inline bool isS16Constant<uint32_t>(uint32_t Value) {
return (Value <= ((1 << 15) - 1));
}
template<>
inline bool isS16Constant<int64_t>(int64_t Value) {
return (Value >= -(1 << 15) && Value <= (1 << 15) - 1);
}
template<>
inline bool isS16Constant<uint64_t>(uint64_t Value) {
return (Value <= ((1 << 15) - 1));
}
extern Target TheCellSPUTarget;
}

View File

@ -28,6 +28,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
@ -335,6 +336,7 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo *MFI = MF.getFrameInfo();
DebugLoc dl = II->getDebugLoc();
while (!MI.getOperand(i).isFI()) {
++i;
@ -363,11 +365,22 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
// Replace the FrameIndex with base register with $sp (aka $r1)
SPOp.ChangeToRegister(SPU::R1, false);
if (Offset > SPUFrameInfo::maxFrameOffset()
|| Offset < SPUFrameInfo::minFrameOffset()) {
errs() << "Large stack adjustment ("
<< Offset
<< ") in SPURegisterInfo::eliminateFrameIndex.";
// if 'Offset' doesn't fit to the D-form instruction's
// immediate, convert the instruction to X-form
// if the instruction is not an AI (which takes a s10 immediate), assume
// it is a load/store that can take a s14 immediate
if ( (MI.getOpcode() == SPU::AIr32 && !isS10Constant(Offset))
|| !isS14Constant(Offset) ) {
int newOpcode = convertDFormToXForm(MI.getOpcode());
unsigned tmpReg = findScratchRegister(II, RS, &SPU::R32CRegClass, SPAdj);
BuildMI(MBB, II, dl, TII.get(SPU::ILr32), tmpReg )
.addImm(Offset);
BuildMI(MBB, II, dl, TII.get(newOpcode), MI.getOperand(0).getReg())
.addReg(tmpReg, RegState::Kill)
.addReg(SPU::R1);
// remove the replaced D-form instruction
MBB.erase(II);
} else {
MO.ChangeToImmediate(Offset);
}
@ -422,6 +435,14 @@ void SPURegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
MF.getRegInfo().setPhysRegUnused(SPU::R0);
MF.getRegInfo().setPhysRegUnused(SPU::R1);
MF.getRegInfo().setPhysRegUnused(SPU::R2);
MachineFrameInfo *MFI = MF.getFrameInfo();
const TargetRegisterClass *RC = &SPU::R32CRegClass;
RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
RC->getAlignment(),
false));
}
void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
@ -466,7 +487,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
// Adjust $sp by required amout
BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1)
.addImm(FrameSize);
} else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) {
} else if (isS16Constant(FrameSize)) {
// Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
// $r2 to adjust $sp:
BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
@ -474,7 +495,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
.addReg(SPU::R1);
BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
.addImm(FrameSize);
BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1)
BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1)
.addReg(SPU::R2)
.addReg(SPU::R1);
BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
@ -573,7 +594,7 @@ SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
.addReg(SPU::R2);
BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
.addImm(16)
.addReg(SPU::R2);
.addReg(SPU::R1);
BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2).
addReg(SPU::R2)
.addImm(16);
@ -617,4 +638,42 @@ SPURegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
return SPUGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
}
int
SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
{
switch(dFormOpcode)
{
case SPU::AIr32: return SPU::Ar32;
case SPU::LQDr32: return SPU::LQXr32;
case SPU::LQDr128: return SPU::LQXr128;
case SPU::LQDv16i8: return SPU::LQXv16i8;
case SPU::LQDv4f32: return SPU::LQXv4f32;
case SPU::STQDr32: return SPU::STQXr32;
case SPU::STQDr128: return SPU::STQXr128;
case SPU::STQDv4i32: return SPU::STQXv4i32;
case SPU::STQDv4f32: return SPU::STQXv4f32;
default: assert( false && "Unhandled D to X-form conversion");
}
// default will assert, but need to return something to keep the
// compiler happy.
return dFormOpcode;
}
// TODO this is already copied from PPC. Could this convenience function
// be moved to the RegScavenger class?
unsigned
SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II,
RegScavenger *RS,
const TargetRegisterClass *RC,
int SPAdj) const
{
assert(RS && "Register scavenging must be on");
unsigned Reg = RS->FindUnusedReg(RC);
if (Reg == 0)
Reg = RS->scavengeRegister(RC, II, SPAdj);
assert( Reg && "Register scavenger failed");
return Reg;
}
#include "SPUGenRegisterInfo.inc"

View File

@ -53,6 +53,10 @@ namespace llvm {
virtual const TargetRegisterClass* const *
getCalleeSavedRegClasses(const MachineFunction *MF) const;
//! Allow for scavenging, so we can get scratch registers when needed.
virtual bool requiresRegisterScavenging(const MachineFunction &MF) const
{ return true; }
//! Return the reserved registers
BitVector getReservedRegs(const MachineFunction &MF) const;
@ -97,6 +101,21 @@ namespace llvm {
//! Get DWARF debugging register number
int getDwarfRegNum(unsigned RegNum, bool isEH) const;
//! Convert D-form load/store to X-form load/store
/*!
Converts a regiser displacement load/store into a register-indexed
load/store for large stack frames, when the stack frame exceeds the
range of a s10 displacement.
*/
int convertDFormToXForm(int dFormOpcode) const;
//! Acquire an unused register in an emergency.
unsigned findScratchRegister(MachineBasicBlock::iterator II,
RegScavenger *RS,
const TargetRegisterClass *RC,
int SPAdj) const;
};
} // end namespace llvm