forked from OSchip/llvm-project
AMDGPU: Fix ignoring DisableFramePointerElim in leaf functions
The attribute can specify elimination for leaf or non-leaf, so it should always be considered. I copied this bug from AArch64, which probably should also be fixed. llvm-svn: 363949
This commit is contained in:
parent
68f29dac4b
commit
5dc457cbe4
|
@ -707,12 +707,12 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
.addReg(ScratchExecCopy);
|
||||
}
|
||||
|
||||
if (hasFP(MF)) {
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
uint32_t NumBytes = MFI.getStackSize();
|
||||
uint32_t RoundedSize = FuncInfo->isStackRealigned() ?
|
||||
NumBytes + MFI.getMaxAlignment() : NumBytes;
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
uint32_t NumBytes = MFI.getStackSize();
|
||||
uint32_t RoundedSize = FuncInfo->isStackRealigned() ?
|
||||
NumBytes + MFI.getMaxAlignment() : NumBytes;
|
||||
|
||||
if (RoundedSize != 0 && hasFP(MF)) {
|
||||
const unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
|
||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
|
||||
.addReg(StackPtrReg)
|
||||
|
@ -863,14 +863,10 @@ bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
|
|||
// API SP if there are calls.
|
||||
if (MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction())
|
||||
return true;
|
||||
|
||||
// Retain behavior of always omitting the FP for leaf functions when
|
||||
// possible.
|
||||
if (MF.getTarget().Options.DisableFramePointerElim(MF))
|
||||
return true;
|
||||
}
|
||||
|
||||
return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
|
||||
MFI.hasStackMap() || MFI.hasPatchPoint() ||
|
||||
MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF);
|
||||
MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF) ||
|
||||
MF.getTarget().Options.DisableFramePointerElim(MF);
|
||||
}
|
||||
|
|
|
@ -9,15 +9,22 @@ define void @callee_no_stack() #0 {
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim:
|
||||
; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim_all:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_mov_b32 s5, s32
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @callee_no_stack_no_fp_elim() #1 {
|
||||
define void @callee_no_stack_no_fp_elim_all() #1 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; Requires frame pointer for access to local regular object.
|
||||
; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim_nonleaf:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @callee_no_stack_no_fp_elim_nonleaf() #2 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}callee_with_stack:
|
||||
; GCN: ; %bb.0:
|
||||
|
@ -32,6 +39,35 @@ define void @callee_with_stack() #0 {
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_all:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_mov_b32 s5, s32
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x200
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s5 offset:4{{$}}
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x200
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @callee_with_stack_no_fp_elim_all() #1 {
|
||||
%alloca = alloca i32, addrspace(5)
|
||||
store volatile i32 0, i32 addrspace(5)* %alloca
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_non_leaf:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32{{$}}
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @callee_with_stack_no_fp_elim_non_leaf() #2 {
|
||||
%alloca = alloca i32, addrspace(5)
|
||||
store volatile i32 0, i32 addrspace(5)* %alloca
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}callee_with_stack_and_call:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt
|
||||
|
@ -151,4 +187,5 @@ define void @spill_only_csr_sgpr() {
|
|||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind "no-frame-pointer-elim"="true" }
|
||||
attributes #1 = { nounwind "frame-pointer"="all" }
|
||||
attributes #2 = { nounwind "frame-pointer"="non-leaf" }
|
||||
|
|
Loading…
Reference in New Issue