[RISCV] Reserve an emergency spill slot for the register scavenger when necessary

Although the register scavenger can often find a spare register, an emergency 
spill slot is needed to guarantee success. Reserve this slot in cases where 
the function is known to have a large stack (meaning the scavenger may be 
needed when forming stack addresses).

llvm-svn: 322269
This commit is contained in:
Alex Bradbury 2018-01-11 11:17:19 +00:00
parent da45439fba
commit 0715d35ed5
3 changed files with 114 additions and 7 deletions

View File

@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
using namespace llvm;
@ -224,3 +225,21 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(RISCV::X1);
SavedRegs.set(RISCV::X8);
}
void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
MachineFunction &MF, RegScavenger *RS) const {
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterClass *RC = &RISCV::GPRRegClass;
// estimateStackSize has been observed to under-estimate the final stack
// size, so give ourselves wiggle-room by checking for stack size
// representable an 11-bit signed field rather than 12-bits.
// FIXME: It may be possible to craft a function with a small stack that
// still needs an emergency spill slot for branch relaxation. This case
// would currently be missed.
if (!isInt<11>(MFI.estimateStackSize(MF))) {
int RegScavFI = MFI.CreateStackObject(
RegInfo->getSpillSize(*RC), RegInfo->getSpillAlignment(*RC), false);
RS->addScavengingFrameIndex(RegScavFI);
}
}

View File

@ -36,6 +36,9 @@ public:
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS) const override;
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS) const override;
bool hasFP(const MachineFunction &MF) const override;
MachineBasicBlock::iterator

View File

@ -8,31 +8,116 @@ define void @test() nounwind {
; RV32I-LABEL: test:
; RV32I: # %bb.0:
; RV32I-NEXT: lui a0, 74565
; RV32I-NEXT: addi a0, a0, 1664
; RV32I-NEXT: addi a0, a0, 1680
; RV32I-NEXT: sub sp, sp, a0
; RV32I-NEXT: lui a0, 74565
; RV32I-NEXT: addi a0, a0, 1660
; RV32I-NEXT: addi a0, a0, 1676
; RV32I-NEXT: add a0, sp, a0
; RV32I-NEXT: sw ra, 0(a0)
; RV32I-NEXT: lui a0, 74565
; RV32I-NEXT: addi a0, a0, 1656
; RV32I-NEXT: addi a0, a0, 1672
; RV32I-NEXT: add a0, sp, a0
; RV32I-NEXT: sw s0, 0(a0)
; RV32I-NEXT: lui a0, 74565
; RV32I-NEXT: addi a0, a0, 1664
; RV32I-NEXT: addi a0, a0, 1680
; RV32I-NEXT: add s0, sp, a0
; RV32I-NEXT: lui a0, 74565
; RV32I-NEXT: addi a0, a0, 1656
; RV32I-NEXT: addi a0, a0, 1672
; RV32I-NEXT: add a0, sp, a0
; RV32I-NEXT: lw s0, 0(a0)
; RV32I-NEXT: lui a0, 74565
; RV32I-NEXT: addi a0, a0, 1660
; RV32I-NEXT: addi a0, a0, 1676
; RV32I-NEXT: add a0, sp, a0
; RV32I-NEXT: lw ra, 0(a0)
; RV32I-NEXT: lui a0, 74565
; RV32I-NEXT: addi a0, a0, 1664
; RV32I-NEXT: addi a0, a0, 1680
; RV32I-NEXT: add sp, sp, a0
; RV32I-NEXT: ret
%tmp = alloca [ 305419896 x i8 ] , align 4
ret void
}
; This test case artificially produces register pressure which should force
; use of the emergency spill slot.
define void @test_emergency_spill_slot(i32 %a) nounwind {
; RV32I-LABEL: test_emergency_spill_slot:
; RV32I: # %bb.0:
; RV32I-NEXT: lui a1, 98
; RV32I-NEXT: addi a1, a1, -1376
; RV32I-NEXT: sub sp, sp, a1
; RV32I-NEXT: lui a1, 98
; RV32I-NEXT: addi a1, a1, -1380
; RV32I-NEXT: add a1, sp, a1
; RV32I-NEXT: sw ra, 0(a1)
; RV32I-NEXT: lui a1, 98
; RV32I-NEXT: addi a1, a1, -1384
; RV32I-NEXT: add a1, sp, a1
; RV32I-NEXT: sw s0, 0(a1)
; RV32I-NEXT: lui a1, 98
; RV32I-NEXT: addi a1, a1, -1388
; RV32I-NEXT: add a1, sp, a1
; RV32I-NEXT: sw s1, 0(a1)
; RV32I-NEXT: lui a1, 98
; RV32I-NEXT: addi a1, a1, -1392
; RV32I-NEXT: add a1, sp, a1
; RV32I-NEXT: sw s2, 0(a1)
; RV32I-NEXT: lui a1, 98
; RV32I-NEXT: addi a1, a1, -1376
; RV32I-NEXT: add s0, sp, a1
; RV32I-NEXT: lui a1, 78
; RV32I-NEXT: addi a1, a1, 512
; RV32I-NEXT: lui a2, 1048478
; RV32I-NEXT: addi a2, a2, 1388
; RV32I-NEXT: add a2, s0, a2
; RV32I-NEXT: mv a2, a2
; RV32I-NEXT: add a1, a2, a1
; RV32I-NEXT: #APP
; RV32I-NEXT: nop
; RV32I-NEXT: #NO_APP
; RV32I-NEXT: sw a0, 0(a1)
; RV32I-NEXT: #APP
; RV32I-NEXT: nop
; RV32I-NEXT: #NO_APP
; RV32I-NEXT: lui a0, 98
; RV32I-NEXT: addi a0, a0, -1392
; RV32I-NEXT: add a0, sp, a0
; RV32I-NEXT: lw s2, 0(a0)
; RV32I-NEXT: lui a0, 98
; RV32I-NEXT: addi a0, a0, -1388
; RV32I-NEXT: add a0, sp, a0
; RV32I-NEXT: lw s1, 0(a0)
; RV32I-NEXT: lui a0, 98
; RV32I-NEXT: addi a0, a0, -1384
; RV32I-NEXT: add a0, sp, a0
; RV32I-NEXT: lw s0, 0(a0)
; RV32I-NEXT: lui a0, 98
; RV32I-NEXT: addi a0, a0, -1380
; RV32I-NEXT: add a0, sp, a0
; RV32I-NEXT: lw ra, 0(a0)
; RV32I-NEXT: lui a0, 98
; RV32I-NEXT: addi a0, a0, -1376
; RV32I-NEXT: add sp, sp, a0
; RV32I-NEXT: ret
%data = alloca [ 100000 x i32 ] , align 4
%ptr = getelementptr inbounds [100000 x i32], [100000 x i32]* %data, i32 0, i32 80000
%1 = tail call { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } asm sideeffect "nop", "=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r"()
%asmresult0 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 0
%asmresult1 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 1
%asmresult2 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 2
%asmresult3 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 3
%asmresult4 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 4
%asmresult5 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 5
%asmresult6 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 6
%asmresult7 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 7
%asmresult8 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 8
%asmresult9 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 9
%asmresult10 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 10
%asmresult11 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 11
%asmresult12 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 12
%asmresult13 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 13
%asmresult14 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 14
store volatile i32 %a, i32* %ptr
tail call void asm sideeffect "nop", "r,r,r,r,r,r,r,r,r,r,r,r,r,r,r"(i32 %asmresult0, i32 %asmresult1, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5, i32 %asmresult6, i32 %asmresult7, i32 %asmresult8, i32 %asmresult9, i32 %asmresult10, i32 %asmresult11, i32 %asmresult12, i32 %asmresult13, i32 %asmresult14)
ret void
}