forked from OSchip/llvm-project
AMDGPU: Setup SP/FP in callee function prolog/epilog
llvm-svn: 306312
This commit is contained in:
parent
2a81089116
commit
f28683cf51
|
@ -417,14 +417,69 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
|
|||
|
||||
void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const {
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
if (MFI->isEntryFunction())
|
||||
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
|
||||
if (FuncInfo->isEntryFunction()) {
|
||||
emitEntryFunctionPrologue(MF, MBB);
|
||||
return;
|
||||
}
|
||||
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
|
||||
unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
|
||||
unsigned FramePtrReg = FuncInfo->getFrameOffsetReg();
|
||||
|
||||
MachineBasicBlock::iterator MBBI = MBB.begin();
|
||||
DebugLoc DL;
|
||||
|
||||
bool NeedFP = hasFP(MF);
|
||||
if (NeedFP) {
|
||||
// If we need a base pointer, set it up here. It's whatever the value of
|
||||
// the stack pointer is at this point. Any variable size objects will be
|
||||
// allocated after this, so we can still use the base pointer to reference
|
||||
// locals.
|
||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
|
||||
.addReg(StackPtrReg)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
}
|
||||
|
||||
uint32_t NumBytes = MFI.getStackSize();
|
||||
if (NumBytes != 0 && hasSP(MF)) {
|
||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
|
||||
.addReg(StackPtrReg)
|
||||
.addImm(NumBytes * ST.getWavefrontSize())
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
}
|
||||
}
|
||||
|
||||
void SIFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const {
|
||||
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
|
||||
if (FuncInfo->isEntryFunction())
|
||||
return;
|
||||
|
||||
unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
|
||||
if (StackPtrReg == AMDGPU::NoRegister)
|
||||
return;
|
||||
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
uint32_t NumBytes = MFI.getStackSize();
|
||||
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
|
||||
DebugLoc DL;
|
||||
|
||||
// FIXME: Clarify distinction between no set SP and SP. For callee functions,
|
||||
// it's really whether we need SP to be accurate or not.
|
||||
|
||||
if (NumBytes != 0 && hasSP(MF)) {
|
||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
|
||||
.addReg(StackPtrReg)
|
||||
.addImm(NumBytes * ST.getWavefrontSize())
|
||||
.setMIFlag(MachineInstr::FrameDestroy);
|
||||
}
|
||||
}
|
||||
|
||||
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
|
||||
|
@ -557,3 +612,19 @@ void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF,
|
|||
WorkItemIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);
|
||||
}
|
||||
}
|
||||
|
||||
bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
|
||||
// All stack operations are relative to the frame offset SGPR.
|
||||
// TODO: Still want to eliminate sometimes.
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
|
||||
// XXX - Is this only called after frame is finalized? Should be able to check
|
||||
// frame size.
|
||||
return MFI.hasStackObjects() && !allStackObjectsAreDead(MFI);
|
||||
}
|
||||
|
||||
bool SIFrameLowering::hasSP(const MachineFunction &MF) const {
|
||||
// All stack operations are relative to the frame offset SGPR.
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
return MFI.hasCalls() || MFI.hasVarSizedObjects();
|
||||
}
|
||||
|
|
|
@ -60,6 +60,10 @@ private:
|
|||
|
||||
/// \brief Emits debugger prologue.
|
||||
void emitDebuggerPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const;
|
||||
|
||||
public:
|
||||
bool hasFP(const MachineFunction &MF) const override;
|
||||
bool hasSP(const MachineFunction &MF) const;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
|
|
@ -86,6 +86,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
|
|||
ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
|
||||
ScratchWaveOffsetReg = AMDGPU::SGPR4;
|
||||
FrameOffsetReg = AMDGPU::SGPR5;
|
||||
StackPtrOffsetReg = AMDGPU::SGPR32;
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI %s
|
||||
|
||||
; GCN-LABEL: {{^}}callee_no_stack:
|
||||
; GCN: ; BB#0:
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @callee_no_stack() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; Requires frame pointer for access to local regular object.
|
||||
|
||||
; GCN-LABEL: {{^}}callee_with_stack:
|
||||
; GCN: ; BB#0:
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_mov_b32 s5, s32
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s5 offset:4{{$}}
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @callee_with_stack() #0 {
|
||||
%alloca = alloca i32
|
||||
store volatile i32 0, i32* %alloca
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
|
||||
|
||||
; Test that non-entry function frame indices are expanded properly to
|
||||
; give an index relative to the scratch wave offset register
|
||||
|
@ -71,8 +71,9 @@ define void @func_load_private_arg_i32_ptr(i32* %ptr) #0 {
|
|||
|
||||
; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr:
|
||||
; GCN: s_waitcnt
|
||||
; GCN-NEXT: s_sub_u32 s6, s5, s4
|
||||
; GCN-NEXT: v_lshr_b32_e64 v0, s6, 6
|
||||
; GCN-NEXT: s_mov_b32 s5, s32
|
||||
; GCN-NEXT: s_sub_u32 [[SUB:s[0-9]+]], s5, s4
|
||||
; GCN-NEXT: v_lshr_b32_e64 v0, [[SUB]], 6
|
||||
; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, v0
|
||||
; GCN-NOT: v_mov
|
||||
; GCN: ds_write_b32 v0, v0
|
||||
|
@ -86,6 +87,7 @@ define void @void_func_byval_struct_i8_i32_ptr({ i8, i32 }* byval %arg0) #0 {
|
|||
|
||||
; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_value:
|
||||
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_mov_b32 s5, s32
|
||||
; GCN-NEXT: buffer_load_ubyte v0, off, s[0:3], s5
|
||||
; GCN_NEXT: buffer_load_dword v1, off, s[0:3], s5 offset:4
|
||||
define void @void_func_byval_struct_i8_i32_ptr_value({ i8, i32 }* byval %arg0) #0 {
|
||||
|
|
Loading…
Reference in New Issue