forked from OSchip/llvm-project
[AMDGPU] Add support for in-order bvh in waitcnt pass
bvh should be handled separately from vmem and vmem with sampler instructions for waitcnt handling. Differential Revision: https://reviews.llvm.org/D114794
This commit is contained in:
parent
17a3385c35
commit
0e8590f065
|
@ -150,6 +150,8 @@ enum VmemType {
|
||||||
VMEM_NOSAMPLER,
|
VMEM_NOSAMPLER,
|
||||||
// MIMG instructions with a sampler.
|
// MIMG instructions with a sampler.
|
||||||
VMEM_SAMPLER,
|
VMEM_SAMPLER,
|
||||||
|
// BVH instructions
|
||||||
|
VMEM_BVH
|
||||||
};
|
};
|
||||||
|
|
||||||
VmemType getVmemType(const MachineInstr &Inst) {
|
VmemType getVmemType(const MachineInstr &Inst) {
|
||||||
|
@ -157,9 +159,10 @@ VmemType getVmemType(const MachineInstr &Inst) {
|
||||||
if (!SIInstrInfo::isMIMG(Inst))
|
if (!SIInstrInfo::isMIMG(Inst))
|
||||||
return VMEM_NOSAMPLER;
|
return VMEM_NOSAMPLER;
|
||||||
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Inst.getOpcode());
|
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Inst.getOpcode());
|
||||||
return AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode)->Sampler
|
const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
|
||||||
? VMEM_SAMPLER
|
AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
|
||||||
: VMEM_NOSAMPLER;
|
return BaseInfo->BVH ? VMEM_BVH
|
||||||
|
: BaseInfo->Sampler ? VMEM_SAMPLER : VMEM_NOSAMPLER;
|
||||||
}
|
}
|
||||||
|
|
||||||
void addWait(AMDGPU::Waitcnt &Wait, InstCounterType T, unsigned Count) {
|
void addWait(AMDGPU::Waitcnt &Wait, InstCounterType T, unsigned Count) {
|
||||||
|
|
|
@ -23,6 +23,7 @@ body: |
|
||||||
; GCN: S_WAITCNT 0
|
; GCN: S_WAITCNT 0
|
||||||
; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
|
; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
|
||||||
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||||
|
; GCN-NEXT: S_WAITCNT 16240
|
||||||
; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
||||||
; GCN-NEXT: S_ENDPGM 0
|
; GCN-NEXT: S_ENDPGM 0
|
||||||
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||||
|
@ -52,6 +53,7 @@ body: |
|
||||||
; GCN: S_WAITCNT 0
|
; GCN: S_WAITCNT 0
|
||||||
; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
|
; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
|
||||||
; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
||||||
|
; GCN-NEXT: S_WAITCNT 16240
|
||||||
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||||
; GCN-NEXT: S_ENDPGM 0
|
; GCN-NEXT: S_ENDPGM 0
|
||||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
||||||
|
|
Loading…
Reference in New Issue