[AMDGPU] Remove assertion on S1024 SGPR to VGPR spill

Summary:
Replace an assertion that blocks S1024 SGPR to VGPR spill.
The assertion pre-dates S1024 and is not wave size dependent.

Reviewers: arsenm, sameerds, rampitec

Reviewed By: arsenm

Subscribers: qcolombet, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D80783
This commit is contained in:
Carl Ritson 2020-05-30 11:15:39 +09:00
parent 0892a96a05
commit d04147789f
2 changed files with 236 additions and 15 deletions

View File

@ -287,16 +287,19 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
unsigned Size = FrameInfo.getObjectSize(FI);
assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
unsigned NumLanes = Size / 4;
int NumLanes = Size / 4;
if (NumLanes > WaveSize)
return false;
assert(Size >= 4 && "invalid sgpr spill size");
assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
// Make sure to handle the case where a wide SGPR spill may span between two
// VGPRs.
for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
Register LaneVGPR;
unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);

View File

@ -193,18 +193,236 @@ ret:
ret void
}
; FIXME: x16 inlineasm seems broken
; define amdgpu_kernel void @spill_sgpr_x16(i32 addrspace(1)* %out, i32 %in) #0 {
; %wide.sgpr = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
; %cmp = icmp eq i32 %in, 0
; br i1 %cmp, label %bb0, label %ret
; ALL-LABEL: {{^}}spill_sgpr_x16:
; bb0:
; call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr) #0
; br label %ret
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 8
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 9
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 10
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 11
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 12
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 13
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 14
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 15
; VGPR: s_cbranch_scc1
; ret:
; ret void
; }
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 8
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 9
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 10
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 11
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 12
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 13
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 14
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 15
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: s_cbranch_scc1
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
define amdgpu_kernel void @spill_sgpr_x16(i32 addrspace(1)* %out, i32 %in) #0 {
%wide.sgpr = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
%cmp = icmp eq i32 %in, 0
br i1 %cmp, label %bb0, label %ret
bb0:
call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr) #0
br label %ret
ret:
ret void
}
; ALL-LABEL: {{^}}spill_sgpr_x32:
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 8
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 9
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 10
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 11
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 12
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 13
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 14
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 15
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 16
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 17
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 18
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 19
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 20
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 21
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 22
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 23
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 24
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 25
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 26
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 27
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 28
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 29
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 30
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 31
; VGPR: s_cbranch_scc1
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 8
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 9
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 10
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 11
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 12
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 13
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 14
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 15
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 16
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 17
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 18
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 19
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 20
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 21
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 22
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 23
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 24
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 25
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 26
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 27
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 28
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 29
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 30
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 31
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: buffer_store_dword
; VMEM: s_cbranch_scc1
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
; VMEM: buffer_load_dword
define amdgpu_kernel void @spill_sgpr_x32(i32 addrspace(1)* %out, i32 %in) #0 {
%wide.sgpr = call <32 x i32> asm sideeffect "; def $0", "=s" () #0
%cmp = icmp eq i32 %in, 0
br i1 %cmp, label %bb0, label %ret
bb0:
call void asm sideeffect "; use $0", "s"(<32 x i32> %wide.sgpr) #0
br label %ret
ret:
ret void
}
attributes #0 = { nounwind }