forked from OSchip/llvm-project
For ARM stack frames that utilize variable sized objects and have either
large local stack areas or require dynamic stack realignment, allocate a base register via which to access the local frame. This allows efficient access to frame indices not accessible via the FP (either due to being out of range or due to dynamic realignment) or the SP (due to variable sized object allocation). In particular, this greatly improves efficiency of access to spill slots in Thumb functions which contain VLAs. rdar://7352504 rdar://8374540 rdar://8355680 llvm-svn: 112883
This commit is contained in:
parent
310083c3e3
commit
7fd9aea67c
|
@ -50,6 +50,10 @@ EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(true), cl::Hidden,
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
|
static cl::opt<bool>
|
||||||
|
EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true),
|
||||||
|
cl::desc("Enable use of a base pointer for complex stack frames"));
|
||||||
|
|
||||||
unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum,
|
unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum,
|
||||||
bool *isSPVFP) {
|
bool *isSPVFP) {
|
||||||
if (isSPVFP)
|
if (isSPVFP)
|
||||||
|
@ -146,7 +150,8 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
|
||||||
const ARMSubtarget &sti)
|
const ARMSubtarget &sti)
|
||||||
: ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
|
: ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
|
||||||
TII(tii), STI(sti),
|
TII(tii), STI(sti),
|
||||||
FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11) {
|
FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11),
|
||||||
|
BasePtr(ARM::R6) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const unsigned*
|
const unsigned*
|
||||||
|
@ -182,6 +187,8 @@ getReservedRegs(const MachineFunction &MF) const {
|
||||||
Reserved.set(ARM::FPSCR);
|
Reserved.set(ARM::FPSCR);
|
||||||
if (hasFP(MF))
|
if (hasFP(MF))
|
||||||
Reserved.set(FramePtr);
|
Reserved.set(FramePtr);
|
||||||
|
if (hasBasePointer(MF))
|
||||||
|
Reserved.set(BasePtr);
|
||||||
// Some targets reserve R9.
|
// Some targets reserve R9.
|
||||||
if (STI.isR9Reserved())
|
if (STI.isR9Reserved())
|
||||||
Reserved.set(ARM::R9);
|
Reserved.set(ARM::R9);
|
||||||
|
@ -195,6 +202,10 @@ bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF,
|
||||||
case ARM::SP:
|
case ARM::SP:
|
||||||
case ARM::PC:
|
case ARM::PC:
|
||||||
return true;
|
return true;
|
||||||
|
case ARM::R6:
|
||||||
|
if (hasBasePointer(MF))
|
||||||
|
return true;
|
||||||
|
break;
|
||||||
case ARM::R7:
|
case ARM::R7:
|
||||||
case ARM::R11:
|
case ARM::R11:
|
||||||
if (FramePtr == Reg && hasFP(MF))
|
if (FramePtr == Reg && hasFP(MF))
|
||||||
|
@ -625,35 +636,49 @@ bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const {
|
||||||
MFI->isFrameAddressTaken());
|
MFI->isFrameAddressTaken());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
|
bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
|
||||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||||
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
|
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
|
||||||
return (RealignStack &&
|
|
||||||
!AFI->isThumb1OnlyFunction() &&
|
if (!EnableBasePointer)
|
||||||
!MFI->hasVarSizedObjects());
|
return false;
|
||||||
|
|
||||||
|
if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
|
||||||
|
return true;
|
||||||
|
|
||||||
|
// Thumb has trouble with negative offsets from the FP. Thumb2 has a limited
|
||||||
|
// negative range for ldr/str (255), and thumb1 is positive offsets only.
|
||||||
|
// It's going to be better to use the SP or Base Pointer instead. When there
|
||||||
|
// are variable sized objects, we can't reference off of the SP, so we
|
||||||
|
// reserve a Base Pointer.
|
||||||
|
if (AFI->isThumbFunction() && MFI->hasVarSizedObjects()) {
|
||||||
|
// Conservatively estimate whether the negative offset from the frame
|
||||||
|
// pointer will be sufficient to reach. If a function has a smallish
|
||||||
|
// frame, it's less likely to have lots of spills and callee saved
|
||||||
|
// space, so it's all more likely to be within range of the frame pointer.
|
||||||
|
// If it's wrong, the scavenger will still enable access to work, it just
|
||||||
|
// won't be optimal.
|
||||||
|
if (AFI->isThumb2Function() && MFI->getLocalFrameSize() < 128)
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
|
||||||
|
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
|
||||||
|
return (RealignStack && !AFI->isThumb1OnlyFunction());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ARMBaseRegisterInfo::
|
bool ARMBaseRegisterInfo::
|
||||||
needsStackRealignment(const MachineFunction &MF) const {
|
needsStackRealignment(const MachineFunction &MF) const {
|
||||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||||
const Function *F = MF.getFunction();
|
const Function *F = MF.getFunction();
|
||||||
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
|
|
||||||
unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
|
unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
|
||||||
bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
|
bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
|
||||||
F->hasFnAttr(Attribute::StackAlignment));
|
F->hasFnAttr(Attribute::StackAlignment));
|
||||||
|
|
||||||
// FIXME: Currently we don't support stack realignment for functions with
|
|
||||||
// variable-sized allocas.
|
|
||||||
// FIXME: It's more complicated than this...
|
|
||||||
if (0 && requiresRealignment && MFI->hasVarSizedObjects())
|
|
||||||
report_fatal_error(
|
|
||||||
"Stack realignment in presense of dynamic allocas is not supported");
|
|
||||||
|
|
||||||
// FIXME: This probably isn't the right place for this.
|
|
||||||
if (0 && requiresRealignment && AFI->isThumb1OnlyFunction())
|
|
||||||
report_fatal_error(
|
|
||||||
"Stack realignment in thumb1 functions is not supported");
|
|
||||||
|
|
||||||
return requiresRealignment && canRealignStack(MF);
|
return requiresRealignment && canRealignStack(MF);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -776,6 +801,10 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
|
||||||
if (AFI->isThumb1OnlyFunction() && AFI->getVarArgsRegSaveSize() > 0)
|
if (AFI->isThumb1OnlyFunction() && AFI->getVarArgsRegSaveSize() > 0)
|
||||||
MF.getRegInfo().setPhysRegUsed(ARM::LR);
|
MF.getRegInfo().setPhysRegUsed(ARM::LR);
|
||||||
|
|
||||||
|
// Spill the BasePtr if it's used.
|
||||||
|
if (hasBasePointer(MF))
|
||||||
|
MF.getRegInfo().setPhysRegUsed(BasePtr);
|
||||||
|
|
||||||
// Don't spill FP if the frame can be eliminated. This is determined
|
// Don't spill FP if the frame can be eliminated. This is determined
|
||||||
// by scanning the callee-save registers to see if any is used.
|
// by scanning the callee-save registers to see if any is used.
|
||||||
const unsigned *CSRegs = getCalleeSavedRegs();
|
const unsigned *CSRegs = getCalleeSavedRegs();
|
||||||
|
@ -1022,13 +1051,14 @@ ARMBaseRegisterInfo::ResolveFrameIndexReference(const MachineFunction &MF,
|
||||||
return Offset - AFI->getDPRCalleeSavedAreaOffset();
|
return Offset - AFI->getDPRCalleeSavedAreaOffset();
|
||||||
|
|
||||||
// When dynamically realigning the stack, use the frame pointer for
|
// When dynamically realigning the stack, use the frame pointer for
|
||||||
// parameters, and the stack pointer for locals.
|
// parameters, and the stack/base pointer for locals.
|
||||||
if (needsStackRealignment(MF)) {
|
if (needsStackRealignment(MF)) {
|
||||||
assert (hasFP(MF) && "dynamic stack realignment without a FP!");
|
assert (hasFP(MF) && "dynamic stack realignment without a FP!");
|
||||||
if (isFixed) {
|
if (isFixed) {
|
||||||
FrameReg = getFrameRegister(MF);
|
FrameReg = getFrameRegister(MF);
|
||||||
Offset = FPOffset;
|
Offset = FPOffset;
|
||||||
}
|
} else if (MFI->hasVarSizedObjects())
|
||||||
|
FrameReg = BasePtr;
|
||||||
return Offset;
|
return Offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1036,9 +1066,13 @@ ARMBaseRegisterInfo::ResolveFrameIndexReference(const MachineFunction &MF,
|
||||||
if (hasFP(MF) && AFI->hasStackFrame()) {
|
if (hasFP(MF) && AFI->hasStackFrame()) {
|
||||||
// Use frame pointer to reference fixed objects. Use it for locals if
|
// Use frame pointer to reference fixed objects. Use it for locals if
|
||||||
// there are VLAs (and thus the SP isn't reliable as a base).
|
// there are VLAs (and thus the SP isn't reliable as a base).
|
||||||
if (isFixed || MFI->hasVarSizedObjects()) {
|
if (isFixed || (MFI->hasVarSizedObjects() && !hasBasePointer(MF))) {
|
||||||
FrameReg = getFrameRegister(MF);
|
FrameReg = getFrameRegister(MF);
|
||||||
Offset = FPOffset;
|
Offset = FPOffset;
|
||||||
|
} else if (MFI->hasVarSizedObjects()) {
|
||||||
|
assert(hasBasePointer(MF) && "missing base pointer!");
|
||||||
|
// Use the base register since we have it.
|
||||||
|
FrameReg = BasePtr;
|
||||||
} else if (AFI->isThumb2Function()) {
|
} else if (AFI->isThumb2Function()) {
|
||||||
// In Thumb2 mode, the negative offset is very limited. Try to avoid
|
// In Thumb2 mode, the negative offset is very limited. Try to avoid
|
||||||
// out of range references.
|
// out of range references.
|
||||||
|
@ -1052,6 +1086,9 @@ ARMBaseRegisterInfo::ResolveFrameIndexReference(const MachineFunction &MF,
|
||||||
Offset = FPOffset;
|
Offset = FPOffset;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Use the base pointer if we have one.
|
||||||
|
if (hasBasePointer(MF))
|
||||||
|
FrameReg = BasePtr;
|
||||||
return Offset;
|
return Offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1089,7 +1126,8 @@ unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
|
||||||
case ARM::R5:
|
case ARM::R5:
|
||||||
return ARM::R4;
|
return ARM::R4;
|
||||||
case ARM::R7:
|
case ARM::R7:
|
||||||
return isReservedReg(MF, ARM::R7) ? 0 : ARM::R6;
|
return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
|
||||||
|
? 0 : ARM::R6;
|
||||||
case ARM::R9:
|
case ARM::R9:
|
||||||
return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8;
|
return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8;
|
||||||
case ARM::R11:
|
case ARM::R11:
|
||||||
|
@ -1178,7 +1216,8 @@ unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg,
|
||||||
case ARM::R4:
|
case ARM::R4:
|
||||||
return ARM::R5;
|
return ARM::R5;
|
||||||
case ARM::R6:
|
case ARM::R6:
|
||||||
return isReservedReg(MF, ARM::R7) ? 0 : ARM::R7;
|
return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
|
||||||
|
? 0 : ARM::R7;
|
||||||
case ARM::R8:
|
case ARM::R8:
|
||||||
return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9;
|
return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9;
|
||||||
case ARM::R10:
|
case ARM::R10:
|
||||||
|
@ -1878,6 +1917,20 @@ emitPrologue(MachineFunction &MF) const {
|
||||||
AFI->setShouldRestoreSPFromFP(true);
|
AFI->setShouldRestoreSPFromFP(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we need a base pointer, set it up here. It's whatever the value
|
||||||
|
// of the stack pointer is at this point. Any variable size objects
|
||||||
|
// will be allocated after this, so we can still use the base pointer
|
||||||
|
// to reference locals.
|
||||||
|
if (hasBasePointer(MF)) {
|
||||||
|
if (isARM)
|
||||||
|
BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), BasePtr)
|
||||||
|
.addReg(ARM::SP)
|
||||||
|
.addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
|
||||||
|
else
|
||||||
|
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr)
|
||||||
|
.addReg(ARM::SP);
|
||||||
|
}
|
||||||
|
|
||||||
// If the frame has variable sized objects then the epilogue must restore
|
// If the frame has variable sized objects then the epilogue must restore
|
||||||
// the sp from fp.
|
// the sp from fp.
|
||||||
if (!AFI->shouldRestoreSPFromFP() && MFI->hasVarSizedObjects())
|
if (!AFI->shouldRestoreSPFromFP() && MFI->hasVarSizedObjects())
|
||||||
|
|
|
@ -52,6 +52,11 @@ protected:
|
||||||
/// FramePtr - ARM physical register used as frame ptr.
|
/// FramePtr - ARM physical register used as frame ptr.
|
||||||
unsigned FramePtr;
|
unsigned FramePtr;
|
||||||
|
|
||||||
|
/// BasePtr - ARM physical register used as a base ptr in complex stack
|
||||||
|
/// frames. I.e., when we need a 3rd base, not just SP and FP, due to
|
||||||
|
/// variable size stack objects.
|
||||||
|
unsigned BasePtr;
|
||||||
|
|
||||||
// Can be only subclassed.
|
// Can be only subclassed.
|
||||||
explicit ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
|
explicit ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
|
||||||
const ARMSubtarget &STI);
|
const ARMSubtarget &STI);
|
||||||
|
@ -102,6 +107,7 @@ public:
|
||||||
MachineFunction &MF) const;
|
MachineFunction &MF) const;
|
||||||
|
|
||||||
bool hasFP(const MachineFunction &MF) const;
|
bool hasFP(const MachineFunction &MF) const;
|
||||||
|
bool hasBasePointer(const MachineFunction &MF) const;
|
||||||
|
|
||||||
bool canRealignStack(const MachineFunction &MF) const;
|
bool canRealignStack(const MachineFunction &MF) const;
|
||||||
bool needsStackRealignment(const MachineFunction &MF) const;
|
bool needsStackRealignment(const MachineFunction &MF) const;
|
||||||
|
|
|
@ -604,9 +604,12 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||||
else if (MF.getFrameInfo()->hasVarSizedObjects()) {
|
else if (MF.getFrameInfo()->hasVarSizedObjects()) {
|
||||||
assert(SPAdj == 0 && hasFP(MF) && "Unexpected");
|
assert(SPAdj == 0 && hasFP(MF) && "Unexpected");
|
||||||
// There are alloca()'s in this function, must reference off the frame
|
// There are alloca()'s in this function, must reference off the frame
|
||||||
// pointer instead.
|
// pointer or base pointer instead.
|
||||||
FrameReg = getFrameRegister(MF);
|
if (!hasBasePointer(MF)) {
|
||||||
Offset -= AFI->getFramePtrSpillOffset();
|
FrameReg = getFrameRegister(MF);
|
||||||
|
Offset -= AFI->getFramePtrSpillOffset();
|
||||||
|
} else
|
||||||
|
FrameReg = BasePtr;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Special handling of dbg_value instructions.
|
// Special handling of dbg_value instructions.
|
||||||
|
@ -787,6 +790,13 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
|
||||||
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
|
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
|
||||||
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
|
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
|
||||||
AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
|
AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
|
||||||
|
|
||||||
|
// If we need a base pointer, set it up here. It's whatever the value
|
||||||
|
// of the stack pointer is at this point. Any variable size objects
|
||||||
|
// will be allocated after this, so we can still use the base pointer
|
||||||
|
// to reference locals.
|
||||||
|
if (hasBasePointer(MF))
|
||||||
|
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
|
static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
; RUN: llc < %s -march=thumb | not grep {ldr sp}
|
; RUN: llc < %s -march=thumb | not grep {ldr sp}
|
||||||
; RUN: llc < %s -mtriple=thumb-apple-darwin | \
|
; RUN: llc < %s -mtriple=thumb-apple-darwin | \
|
||||||
; RUN: not grep {sub.*r7}
|
; RUN: not grep {sub.*r7}
|
||||||
; RUN: llc < %s -march=thumb | grep 4294967280
|
; RUN: llc < %s -march=thumb | grep {mov.*r6, sp}
|
||||||
|
|
||||||
%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
|
%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
|
||||||
%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
|
%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
|
||||||
|
|
|
@ -7,19 +7,12 @@
|
||||||
define void @t() nounwind ssp {
|
define void @t() nounwind ssp {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: t:
|
; CHECK: t:
|
||||||
; CHECK: push {r4, r7}
|
|
||||||
; CHECK: mov r0, sp
|
|
||||||
; CHECK: add r7, sp, #4
|
|
||||||
; CHECK: bic r0, r0, #7
|
|
||||||
; CHECK: subs r0, #16
|
|
||||||
; CHECK: mov sp, r0
|
|
||||||
; CHECK: mov r0, sp
|
|
||||||
; CHECK: bic r0, r0, #7
|
|
||||||
; CHECK: subs r0, #16
|
|
||||||
; CHECK: mov sp, r0
|
|
||||||
|
|
||||||
%size = mul i32 8, 2
|
%size = mul i32 8, 2
|
||||||
|
; CHECK: subs r0, #16
|
||||||
|
; CHECK: mov sp, r0
|
||||||
%vla_a = alloca i8, i32 %size, align 8
|
%vla_a = alloca i8, i32 %size, align 8
|
||||||
|
; CHECK: subs r0, #16
|
||||||
|
; CHECK: mov sp, r0
|
||||||
%vla_b = alloca i8, i32 %size, align 8
|
%vla_b = alloca i8, i32 %size, align 8
|
||||||
unreachable
|
unreachable
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue