forked from OSchip/llvm-project
[AMDGPU] Remove the dead spill slots while spilling FP/BP to memory
During the PEI pass, the dead TargetStackID::SGPRSpill spill slots are not being removed while spilling the FP/BP to memory. Fixes: SWDEV-250393 Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D87032
This commit is contained in:
parent
62f89a89f3
commit
aff94ec0f4
|
@ -112,15 +112,19 @@ static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF,
|
|||
// 3: There's no free lane to spill, and no free register to save FP/BP,
|
||||
// so we're forced to spill another VGPR to use for the spill.
|
||||
FrameIndex = NewFI;
|
||||
|
||||
LLVM_DEBUG(
|
||||
auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
|
||||
dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "
|
||||
<< printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';);
|
||||
} else {
|
||||
// Remove dead <NewFI> index
|
||||
MF.getFrameInfo().RemoveStackObject(NewFI);
|
||||
// 4: If all else fails, spill the FP/BP to memory.
|
||||
FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4));
|
||||
LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling "
|
||||
<< (IsFP ? "FP" : "BP") << '\n');
|
||||
}
|
||||
|
||||
LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
|
||||
dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "
|
||||
<< printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
|
||||
<< '\n';);
|
||||
} else {
|
||||
LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to "
|
||||
<< printReg(TempSGPR, TRI) << '\n');
|
||||
|
|
|
@ -463,15 +463,16 @@ define void @ipra_call_with_stack() #0 {
|
|||
; GCN-LABEL: {{^}}callee_need_to_spill_fp_to_memory:
|
||||
; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; GCN: v_mov_b32_e32 [[TMP_VGPR1:v[0-9]+]], s33
|
||||
; GCN: buffer_store_dword [[TMP_VGPR1]], off, s[0:3], s32 offset:[[OFF:[0-9]+]]
|
||||
; GCN: buffer_store_dword [[TMP_VGPR1]], off, s[0:3], s32 offset:4
|
||||
; GCN: s_mov_b64 exec, [[COPY_EXEC1]]
|
||||
; GCN: s_mov_b32 s33, s32
|
||||
; GCN: s_or_saveexec_b64 [[COPY_EXEC2:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; GCN: buffer_load_dword [[TMP_VGPR2:v[0-9]+]], off, s[0:3], s32 offset:[[OFF]]
|
||||
; GCN: buffer_load_dword [[TMP_VGPR2:v[0-9]+]], off, s[0:3], s32 offset:4
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
; GCN: v_readfirstlane_b32 s33, [[TMP_VGPR2]]
|
||||
; GCN: s_mov_b64 exec, [[COPY_EXEC2]]
|
||||
; GCN: s_setpc_b64
|
||||
; GCN: ScratchSize: 8
|
||||
define void @callee_need_to_spill_fp_to_memory() #3 {
|
||||
call void asm sideeffect "; clobber nonpreserved SGPRs",
|
||||
"~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
|
||||
|
@ -529,8 +530,8 @@ define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 {
|
|||
; GCN-LABEL: {{^}}spill_fp_to_memory_scratch_reg_needed_mubuf_offset
|
||||
; GCN: s_or_saveexec_b64 s[4:5], -1
|
||||
; GCN: v_mov_b32_e32 v0, s33
|
||||
; GCN-NOT: v_mov_b32_e32 v0, 0x100c
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0x100c
|
||||
; GCN-NOT: v_mov_b32_e32 v0, 0x1008
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0x1008
|
||||
; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen
|
||||
define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset([4096 x i8] addrspace(5)* byval align 4 %arg) #3 {
|
||||
%alloca = alloca i32, addrspace(5)
|
||||
|
|
|
@ -291,12 +291,12 @@ define void @spill_bp_to_memory_scratch_reg_needed_mubuf_offset(<32 x i32> %a, i
|
|||
; GCN-LABEL: spill_bp_to_memory_scratch_reg_needed_mubuf_offset
|
||||
; GCN: s_or_saveexec_b64 s[4:5], -1
|
||||
; GCN: v_mov_b32_e32 v0, s33
|
||||
; GCN-NOT: v_mov_b32_e32 v0, 0x1088
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0x1088
|
||||
; GCN-NOT: v_mov_b32_e32 v0, 0x1084
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0x1084
|
||||
; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen
|
||||
; GCN: v_mov_b32_e32 v0, s34
|
||||
; GCN-NOT: v_mov_b32_e32 v0, 0x1090
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0x1090
|
||||
; GCN-NOT: v_mov_b32_e32 v0, 0x1088
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0x1088
|
||||
; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen
|
||||
%local_val = alloca i32, align 128, addrspace(5)
|
||||
store volatile i32 %b, i32 addrspace(5)* %local_val, align 128
|
||||
|
|
Loading…
Reference in New Issue