forked from OSchip/llvm-project
AMDGPU: Don't spill SP reg like a normal CSR
llvm-svn: 313217
This commit is contained in:
parent
cc40ef859a
commit
ecb43ef1bc
|
@ -594,6 +594,15 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
|
|||
}
|
||||
}
|
||||
|
||||
void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
|
||||
RegScavenger *RS) const {
|
||||
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
// The SP is specifically managed and we don't want extra spills of it.
|
||||
SavedRegs.reset(MFI->getStackPtrOffsetReg());
|
||||
}
|
||||
|
||||
MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
|
||||
MachineFunction &MF,
|
||||
MachineBasicBlock &MBB,
|
||||
|
|
|
@ -35,6 +35,9 @@ public:
|
|||
int getFrameIndexReference(const MachineFunction &MF, int FI,
|
||||
unsigned &FrameReg) const override;
|
||||
|
||||
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
|
||||
RegScavenger *RS = nullptr) const override;
|
||||
|
||||
void processFunctionBeforeFrameFinalized(
|
||||
MachineFunction &MF,
|
||||
RegScavenger *RS = nullptr) const override;
|
||||
|
|
|
@ -818,6 +818,10 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
|||
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
|
||||
DebugLoc DL = MBB.findDebugLoc(MI);
|
||||
|
||||
assert(SrcReg != MFI->getStackPtrOffsetReg() &&
|
||||
SrcReg != MFI->getFrameOffsetReg() &&
|
||||
SrcReg != MFI->getScratchWaveOffsetReg());
|
||||
|
||||
unsigned Size = FrameInfo.getObjectSize(FrameIndex);
|
||||
unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
|
||||
MachinePointerInfo PtrInfo
|
||||
|
|
|
@ -32,6 +32,7 @@ entry:
|
|||
; GCN: s_mov_b32 s5, s32
|
||||
; GCN-DAG: buffer_store_dword v32
|
||||
; GCN-DAG: buffer_store_dword v33
|
||||
; GCN-NOT: v_writelane_b32 v{{[0-9]+}}, s32
|
||||
; GCN: v_writelane_b32
|
||||
|
||||
; GCN-DAG: s_add_u32 s32, s32, 0xb00{{$}}
|
||||
|
@ -48,6 +49,7 @@ entry:
|
|||
; GCN: buffer_store_dword [[ADD1]], off, s[0:3], s5 offset:20{{$}}
|
||||
|
||||
; GCN: v_readlane_b32
|
||||
; GCN-NOT: v_readlane_b32 s32
|
||||
; GCN: buffer_load_dword v32,
|
||||
; GCN: buffer_load_dword v33,
|
||||
; GCN: s_sub_u32 s32, s32, 0xb00{{$}}
|
||||
|
@ -69,8 +71,8 @@ entry:
|
|||
|
||||
; GCN-LABEL: {{^}}call_void_func_byval_struct_func:
|
||||
; GCN: s_mov_b32 s5, s32
|
||||
; GCN: s_add_u32 s32, s32, 0xc00{{$}}
|
||||
; GCN: v_writelane_b32
|
||||
; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}}
|
||||
; GCN-DAG: v_writelane_b32
|
||||
|
||||
; GCN-DAG: s_add_u32 s32, s32, 0x800{{$}}
|
||||
; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
|
||||
|
@ -101,11 +103,12 @@ entry:
|
|||
; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:32
|
||||
|
||||
; GCN: s_swappc_b64
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x800{{$}}
|
||||
|
||||
; GCN-NOT: v_readlane_b32 s32
|
||||
; GCN: v_readlane_b32
|
||||
; GCN-NOT: v_readlane_b32 s32
|
||||
|
||||
; GCN: s_sub_u32 s32, s32, 0xc00{{$}}
|
||||
; GCN: s_sub_u32 s32, s32, 0x800{{$}}
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0xc00{{$}}
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @call_void_func_byval_struct_func() #0 {
|
||||
|
|
|
@ -93,8 +93,10 @@ define fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32* byval align 4 %arg1)
|
|||
|
||||
; Tail call disallowed with byval in parent.
|
||||
; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_byval_i32_byval_parent:
|
||||
; GCN-NOT: v_writelane_b32 v{{[0-9]+}}, s32
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4
|
||||
; GCN: s_swappc_b64
|
||||
; GCN-NOT: v_readlane_b32 s32
|
||||
; GCN: s_setpc_b64
|
||||
define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, i32* byval %b.byval, i32 %c) #1 {
|
||||
entry:
|
||||
|
|
Loading…
Reference in New Issue