forked from OSchip/llvm-project
[AMDGPU] Remove assertion on S1024 SGPR to VGPR spill
Summary: Replace an assertion that blocks S1024 SGPR to VGPR spill. The assertion pre-dates S1024 and is not wave size dependent. Reviewers: arsenm, sameerds, rampitec Reviewed By: arsenm Subscribers: qcolombet, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D80783
This commit is contained in:
parent
0892a96a05
commit
d04147789f
|
@ -287,16 +287,19 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
|
|||
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
unsigned Size = FrameInfo.getObjectSize(FI);
|
||||
assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
|
||||
assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
|
||||
unsigned NumLanes = Size / 4;
|
||||
|
||||
int NumLanes = Size / 4;
|
||||
if (NumLanes > WaveSize)
|
||||
return false;
|
||||
|
||||
assert(Size >= 4 && "invalid sgpr spill size");
|
||||
assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
|
||||
|
||||
const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
|
||||
|
||||
// Make sure to handle the case where a wide SGPR spill may span between two
|
||||
// VGPRs.
|
||||
for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
|
||||
for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
|
||||
Register LaneVGPR;
|
||||
unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
|
||||
|
||||
|
|
|
@ -193,18 +193,236 @@ ret:
|
|||
ret void
|
||||
}
|
||||
|
||||
; FIXME: x16 inlineasm seems broken
|
||||
; define amdgpu_kernel void @spill_sgpr_x16(i32 addrspace(1)* %out, i32 %in) #0 {
|
||||
; %wide.sgpr = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
|
||||
; %cmp = icmp eq i32 %in, 0
|
||||
; br i1 %cmp, label %bb0, label %ret
|
||||
; ALL-LABEL: {{^}}spill_sgpr_x16:
|
||||
|
||||
; bb0:
|
||||
; call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr) #0
|
||||
; br label %ret
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 8
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 9
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 10
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 11
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 12
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 13
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 14
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 15
|
||||
; VGPR: s_cbranch_scc1
|
||||
|
||||
; ret:
|
||||
; ret void
|
||||
; }
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 8
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 9
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 10
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 11
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 12
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 13
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 14
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 15
|
||||
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: s_cbranch_scc1
|
||||
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
define amdgpu_kernel void @spill_sgpr_x16(i32 addrspace(1)* %out, i32 %in) #0 {
|
||||
%wide.sgpr = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
|
||||
%cmp = icmp eq i32 %in, 0
|
||||
br i1 %cmp, label %bb0, label %ret
|
||||
|
||||
bb0:
|
||||
call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr) #0
|
||||
br label %ret
|
||||
|
||||
ret:
|
||||
ret void
|
||||
}
|
||||
|
||||
; ALL-LABEL: {{^}}spill_sgpr_x32:
|
||||
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 8
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 9
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 10
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 11
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 12
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 13
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 14
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 15
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 16
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 17
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 18
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 19
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 20
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 21
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 22
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 23
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 24
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 25
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 26
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 27
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 28
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 29
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 30
|
||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 31
|
||||
; VGPR: s_cbranch_scc1
|
||||
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 8
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 9
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 10
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 11
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 12
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 13
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 14
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 15
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 16
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 17
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 18
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 19
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 20
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 21
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 22
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 23
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 24
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 25
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 26
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 27
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 28
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 29
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 30
|
||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 31
|
||||
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: buffer_store_dword
|
||||
; VMEM: s_cbranch_scc1
|
||||
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
; VMEM: buffer_load_dword
|
||||
define amdgpu_kernel void @spill_sgpr_x32(i32 addrspace(1)* %out, i32 %in) #0 {
|
||||
%wide.sgpr = call <32 x i32> asm sideeffect "; def $0", "=s" () #0
|
||||
%cmp = icmp eq i32 %in, 0
|
||||
br i1 %cmp, label %bb0, label %ret
|
||||
|
||||
bb0:
|
||||
call void asm sideeffect "; use $0", "s"(<32 x i32> %wide.sgpr) #0
|
||||
br label %ret
|
||||
|
||||
ret:
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
|
Loading…
Reference in New Issue