forked from OSchip/llvm-project
Detect need for autoalignment of the stack earlier to catch spills more
conservatively. eliminateFrameIndex() machinery adjust to handle addr mode 6 (vld1/vst1) used for spills. Fix tests to expect aligned Q-reg spilling llvm-svn: 88874
This commit is contained in:
parent
74ae3e5b0e
commit
01c1cae34d
|
@ -109,6 +109,7 @@ FunctionPass *createNEONPreAllocPass();
|
|||
FunctionPass *createNEONMoveFixPass();
|
||||
FunctionPass *createThumb2ITBlockPass();
|
||||
FunctionPass *createThumb2SizeReductionPass();
|
||||
FunctionPass *createARMMaxStackAlignmentCalculatorPass();
|
||||
|
||||
extern Target TheARMTarget, TheThumbTarget;
|
||||
|
||||
|
|
|
@ -1132,6 +1132,7 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
|||
break;
|
||||
}
|
||||
case ARMII::AddrMode4:
|
||||
case ARMII::AddrMode6:
|
||||
// Can't fold any offset even if it's zero.
|
||||
return false;
|
||||
case ARMII::AddrMode5: {
|
||||
|
|
|
@ -1170,7 +1170,8 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
|||
// as much as possible above, handle the rest, providing a register that is
|
||||
// SP+LargeImm.
|
||||
assert((Offset ||
|
||||
(MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4) &&
|
||||
(MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4 ||
|
||||
(MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode6) &&
|
||||
"This code isn't needed if offset already handled!");
|
||||
|
||||
unsigned ScratchReg = 0;
|
||||
|
@ -1179,7 +1180,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
|||
? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
|
||||
unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
|
||||
if (Offset == 0)
|
||||
// Must be addrmode4.
|
||||
// Must be addrmode4/6.
|
||||
MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false);
|
||||
else {
|
||||
ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass);
|
||||
|
@ -1462,4 +1463,46 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
|
|||
emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct MSAC : public MachineFunctionPass {
|
||||
static char ID;
|
||||
MSAC() : MachineFunctionPass(&ID) {}
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF) {
|
||||
MachineFrameInfo *FFI = MF.getFrameInfo();
|
||||
MachineRegisterInfo &RI = MF.getRegInfo();
|
||||
|
||||
// Calculate max stack alignment of all already allocated stack objects.
|
||||
unsigned MaxAlign = calculateMaxStackAlignment(FFI);
|
||||
|
||||
// Be over-conservative: scan over all vreg defs and find, whether vector
|
||||
// registers are used. If yes - there is probability, that vector register
|
||||
// will be spilled and thus stack needs to be aligned properly.
|
||||
for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
|
||||
RegNum < RI.getLastVirtReg(); ++RegNum)
|
||||
MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment());
|
||||
|
||||
if (FFI->getMaxAlignment() == MaxAlign)
|
||||
return false;
|
||||
|
||||
FFI->setMaxAlignment(MaxAlign);
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual const char *getPassName() const {
|
||||
return "ARM Maximal Stack Alignment Calculator";
|
||||
}
|
||||
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.setPreservesCFG();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
};
|
||||
|
||||
char MSAC::ID = 0;
|
||||
}
|
||||
|
||||
FunctionPass*
|
||||
llvm::createARMMaxStackAlignmentCalculatorPass() { return new MSAC(); }
|
||||
|
||||
#include "ARMGenRegisterInfo.inc"
|
||||
|
|
|
@ -93,6 +93,10 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
|
|||
if (Subtarget.hasNEON())
|
||||
PM.add(createNEONPreAllocPass());
|
||||
|
||||
// Calculate and set max stack object alignment early, so we can decide
|
||||
// whether we will need stack realignment (and thus FP).
|
||||
PM.add(createARMMaxStackAlignmentCalculatorPass());
|
||||
|
||||
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
|
||||
if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
|
||||
PM.add(createARMLoadStoreOptimizationPass(true));
|
||||
|
|
|
@ -11,8 +11,9 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
|
|||
|
||||
define arm_apcscc void @aaa(%quuz* %this, i8* %block) {
|
||||
; CHECK: aaa:
|
||||
; CHECK: vstmia sp
|
||||
; CHECK: vldmia sp
|
||||
; CHECK: bic sp, sp, #15
|
||||
; CHECK: vst1.64 {{.*}}sp @128
|
||||
; CHECK: vld1.64 {{.*}}sp @128
|
||||
entry:
|
||||
%0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
|
||||
store float 6.300000e+01, float* undef, align 4
|
||||
|
|
|
@ -11,8 +11,9 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
|
|||
|
||||
define arm_apcscc void @aaa(%quuz* %this, i8* %block) {
|
||||
; CHECK: aaa:
|
||||
; CHECK: vstmia sp
|
||||
; CHECK: vldmia sp
|
||||
; CHECK: bic sp, sp, #15
|
||||
; CHECK: vst1.64 {{.*}}sp @128
|
||||
; CHECK: vld1.64 {{.*}}sp @128
|
||||
entry:
|
||||
%0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
|
||||
store float 6.300000e+01, float* undef, align 4
|
||||
|
|
Loading…
Reference in New Issue