forked from OSchip/llvm-project
AMDGPU: Don't add emergency stack slot if all spills are SGPR->VGPR
This should avoid reporting any stack needs to be allocated in the case where no stack is truly used. An unused stack slot is still left around in other cases where there are real stack objects but no spilling occurs. llvm-svn: 295891
This commit is contained in:
parent
639d7b68d6
commit
7b6c5d28f5
|
@ -383,6 +383,16 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
|
||||||
|
for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
|
||||||
|
I != E; ++I) {
|
||||||
|
if (!MFI.isDeadObjectIndex(I))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
void SIFrameLowering::processFunctionBeforeFrameFinalized(
|
void SIFrameLowering::processFunctionBeforeFrameFinalized(
|
||||||
MachineFunction &MF,
|
MachineFunction &MF,
|
||||||
RegScavenger *RS) const {
|
RegScavenger *RS) const {
|
||||||
|
@ -391,35 +401,14 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
|
||||||
if (!MFI.hasStackObjects())
|
if (!MFI.hasStackObjects())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
bool MayNeedScavengingEmergencySlot = MFI.hasStackObjects();
|
|
||||||
if (MayNeedScavengingEmergencySlot) {
|
|
||||||
// We force this to be at offset 0 so no user object ever has 0 as an
|
|
||||||
// address, so we may use 0 as an invalid pointer value. This is because
|
|
||||||
// LLVM assumes 0 is an invalid pointer in address space 0. Because alloca
|
|
||||||
// is required to be address space 0, we are forced to accept this for
|
|
||||||
// now. Ideally we could have the stack in another address space with 0 as a
|
|
||||||
// valid pointer, and -1 as the null value.
|
|
||||||
//
|
|
||||||
// This will also waste additional space when user stack objects require > 4
|
|
||||||
// byte alignment.
|
|
||||||
//
|
|
||||||
// The main cost here is losing the offset for addressing modes. However
|
|
||||||
// this also ensures we shouldn't need a register for the offset when
|
|
||||||
// emergency scavenging.
|
|
||||||
int ScavengeFI = MFI.CreateFixedObject(
|
|
||||||
AMDGPU::SGPR_32RegClass.getSize(), 0, false);
|
|
||||||
RS->addScavengingFrameIndex(ScavengeFI);
|
|
||||||
}
|
|
||||||
|
|
||||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||||
const SIRegisterInfo &TRI = TII->getRegisterInfo();
|
const SIRegisterInfo &TRI = TII->getRegisterInfo();
|
||||||
if (!TRI.spillSGPRToVGPR())
|
|
||||||
return;
|
|
||||||
|
|
||||||
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
|
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
|
||||||
if (!FuncInfo->hasSpilledSGPRs())
|
bool AllSGPRSpilledToVGPRs = false;
|
||||||
return;
|
|
||||||
|
if (TRI.spillSGPRToVGPR() && FuncInfo->hasSpilledSGPRs()) {
|
||||||
|
AllSGPRSpilledToVGPRs = true;
|
||||||
|
|
||||||
// Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
|
// Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
|
||||||
// are spilled to VGPRs, in which case we can eliminate the stack usage.
|
// are spilled to VGPRs, in which case we can eliminate the stack usage.
|
||||||
|
@ -437,8 +426,12 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
|
||||||
|
|
||||||
if (TII->isSGPRSpill(MI)) {
|
if (TII->isSGPRSpill(MI)) {
|
||||||
int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
|
int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
|
||||||
if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI))
|
if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
|
||||||
TRI.eliminateSGPRToVGPRSpillFrameIndex(MI, FI, RS);
|
bool Spilled = TRI.eliminateSGPRToVGPRSpillFrameIndex(MI, FI, RS);
|
||||||
|
(void)Spilled;
|
||||||
|
assert(Spilled && "failed to spill SGPR to VGPR when allocated");
|
||||||
|
} else
|
||||||
|
AllSGPRSpilledToVGPRs = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -446,6 +439,32 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
|
||||||
FuncInfo->removeSGPRToVGPRFrameIndices(MFI);
|
FuncInfo->removeSGPRToVGPRFrameIndices(MFI);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: The other checks should be redundant with allStackObjectsAreDead,
|
||||||
|
// but currently hasNonSpillStackObjects is set only from source
|
||||||
|
// allocas. Stack temps produced from legalization are not counted currently.
|
||||||
|
if (FuncInfo->hasNonSpillStackObjects() || FuncInfo->hasSpilledVGPRs() ||
|
||||||
|
!AllSGPRSpilledToVGPRs || !allStackObjectsAreDead(MFI)) {
|
||||||
|
assert(RS && "RegScavenger required if spilling");
|
||||||
|
|
||||||
|
// We force this to be at offset 0 so no user object ever has 0 as an
|
||||||
|
// address, so we may use 0 as an invalid pointer value. This is because
|
||||||
|
// LLVM assumes 0 is an invalid pointer in address space 0. Because alloca
|
||||||
|
// is required to be address space 0, we are forced to accept this for
|
||||||
|
// now. Ideally we could have the stack in another address space with 0 as a
|
||||||
|
// valid pointer, and -1 as the null value.
|
||||||
|
//
|
||||||
|
// This will also waste additional space when user stack objects require > 4
|
||||||
|
// byte alignment.
|
||||||
|
//
|
||||||
|
// The main cost here is losing the offset for addressing modes. However
|
||||||
|
// this also ensures we shouldn't need a register for the offset when
|
||||||
|
// emergency scavenging.
|
||||||
|
int ScavengeFI = MFI.CreateFixedObject(
|
||||||
|
AMDGPU::SGPR_32RegClass.getSize(), 0, false);
|
||||||
|
RS->addScavengingFrameIndex(ScavengeFI);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF,
|
void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF,
|
||||||
MachineBasicBlock &MBB) const {
|
MachineBasicBlock &MBB) const {
|
||||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||||
|
|
|
@ -18,13 +18,12 @@
|
||||||
; GCN: s_mov_b32 m0
|
; GCN: s_mov_b32 m0
|
||||||
|
|
||||||
; Make sure scratch space isn't being used for SGPR->VGPR spills
|
; Make sure scratch space isn't being used for SGPR->VGPR spills
|
||||||
; FIXME: Seem to be leaving behind unused emergency slot.
|
|
||||||
|
|
||||||
; Writing to M0 from an SMRD instruction will hang the GPU.
|
; Writing to M0 from an SMRD instruction will hang the GPU.
|
||||||
; GCN-NOT: s_buffer_load_dword m0
|
; GCN-NOT: s_buffer_load_dword m0
|
||||||
; GCN: s_endpgm
|
; GCN: s_endpgm
|
||||||
|
|
||||||
; TOVGPR: ScratchSize: 4{{$}}
|
; TOVGPR: ScratchSize: 0{{$}}
|
||||||
define amdgpu_ps void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
|
define amdgpu_ps void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
|
||||||
main_body:
|
main_body:
|
||||||
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
|
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
|
||||||
|
@ -768,7 +767,7 @@ ENDIF66: ; preds = %LOOP65
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}main1:
|
; GCN-LABEL: {{^}}main1:
|
||||||
; GCN: s_endpgm
|
; GCN: s_endpgm
|
||||||
; TOVGPR: ScratchSize: 4{{$}}
|
; TOVGPR: ScratchSize: 0{{$}}
|
||||||
define amdgpu_ps void @main1([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
|
define amdgpu_ps void @main1([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
|
||||||
main_body:
|
main_body:
|
||||||
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
|
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
|
||||||
|
|
Loading…
Reference in New Issue