AMDGPU: Fix assert on m0_lo16/m0_hi16

These get added (redundantly) to the bundle expanded for indirect
register accesses. We hit this path only when there is a call in the
function.
This commit is contained in:
Matt Arsenault 2021-06-14 12:53:36 -04:00
parent ec97866454
commit ad4a18251a
2 changed files with 19 additions and 0 deletions

View File

@ -833,6 +833,8 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
case AMDGPU::EXEC_HI: case AMDGPU::EXEC_HI:
case AMDGPU::SCC: case AMDGPU::SCC:
case AMDGPU::M0: case AMDGPU::M0:
case AMDGPU::M0_LO16:
case AMDGPU::M0_HI16:
case AMDGPU::SRC_SHARED_BASE: case AMDGPU::SRC_SHARED_BASE:
case AMDGPU::SRC_SHARED_LIMIT: case AMDGPU::SRC_SHARED_LIMIT:
case AMDGPU::SRC_PRIVATE_BASE: case AMDGPU::SRC_PRIVATE_BASE:

View File

@ -66,6 +66,23 @@ entry:
ret void ret void
} }
declare hidden void @foo()
; For functions with calls, we were not accounting for m0_lo16/m0_hi16
; uses on the BUNDLE created when expanding the insert register pseudo.
; GCN-LABEL: {{^}}insertelement_with_call:
; GCN: s_set_gpr_idx_on s{{[0-9]+}}, gpr_idx(DST)
; GCN-NEXT: v_mov_b32_e32 {{v[0-9]+}}, 8
; GCN-NEXT: s_set_gpr_idx_off
; GCN: s_swappc_b64
define amdgpu_kernel void @insertelement_with_call(<16 x i32> addrspace(1)* %ptr, i32 %idx) #0 {
%vec = load <16 x i32>, <16 x i32> addrspace(1)* %ptr
%i6 = insertelement <16 x i32> %vec, i32 8, i32 %idx
call void @foo()
store <16 x i32> %i6, <16 x i32> addrspace(1)* null
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x() #1 declare i32 @llvm.amdgcn.workitem.id.x() #1
declare void @llvm.amdgcn.s.barrier() #2 declare void @llvm.amdgcn.s.barrier() #2