forked from OSchip/llvm-project
[AMDGPU] Don't check for VMEM hazards on GFX10
The hazard where a VMEM reads an SGPR written by a VALU counts as a data dependency hazard, so no nops are required on GFX10. Tested with Vulkan CTS on GFX10.1 and GFX10.3. Differential Revision: https://reviews.llvm.org/D97926
This commit is contained in:
parent
ba18a51c38
commit
ed7458398a
|
@ -157,12 +157,6 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
|
|||
if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
|
||||
return HazardType;
|
||||
|
||||
// FIXME: Should flat be considered vmem?
|
||||
if ((SIInstrInfo::isVMEM(*MI) ||
|
||||
SIInstrInfo::isFLAT(*MI))
|
||||
&& checkVMEMHazards(MI) > 0)
|
||||
return HazardType;
|
||||
|
||||
if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
|
||||
return HazardType;
|
||||
|
||||
|
@ -172,6 +166,12 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
|
|||
if (ST.hasNoDataDepHazard())
|
||||
return NoHazard;
|
||||
|
||||
// FIXME: Should flat be considered vmem?
|
||||
if ((SIInstrInfo::isVMEM(*MI) ||
|
||||
SIInstrInfo::isFLAT(*MI))
|
||||
&& checkVMEMHazards(MI) > 0)
|
||||
return HazardType;
|
||||
|
||||
if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
|
||||
return HazardType;
|
||||
|
||||
|
@ -275,9 +275,6 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
|
|||
if (SIInstrInfo::isSMRD(*MI))
|
||||
return std::max(WaitStates, checkSMRDHazards(MI));
|
||||
|
||||
if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
|
||||
WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
|
||||
|
||||
if (ST.hasNSAtoVMEMBug())
|
||||
WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI));
|
||||
|
||||
|
@ -286,6 +283,9 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
|
|||
if (ST.hasNoDataDepHazard())
|
||||
return WaitStates;
|
||||
|
||||
if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
|
||||
WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
|
||||
|
||||
if (SIInstrInfo::isVALU(*MI))
|
||||
WaitStates = std::max(WaitStates, checkVALUHazards(MI));
|
||||
|
||||
|
|
|
@ -85,9 +85,8 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr,
|
|||
; GCN-NEXT: v_readfirstlane_b32 s6, v16
|
||||
; GCN-NEXT: v_readfirstlane_b32 s7, v17
|
||||
; GCN-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[14:15]
|
||||
; GCN-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[16:17]
|
||||
; GCN-NEXT: s_nop 2
|
||||
; GCN-NEXT: image_bvh_intersect_ray v[18:21], [v0, v1, v2, v3, v4, v6, v7, v8, v10, v11, v12], s[4:7]
|
||||
; GCN-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[16:17]
|
||||
; GCN-NEXT: s_and_b32 s0, s0, vcc_lo
|
||||
; GCN-NEXT: s_and_saveexec_b32 s0, s0
|
||||
; GCN-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
||||
|
@ -125,9 +124,8 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p
|
|||
; GCN-NEXT: v_readfirstlane_b32 s6, v12
|
||||
; GCN-NEXT: v_readfirstlane_b32 s7, v13
|
||||
; GCN-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11]
|
||||
; GCN-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13]
|
||||
; GCN-NEXT: s_nop 2
|
||||
; GCN-NEXT: image_bvh_intersect_ray v[5:8], [v0, v1, v2, v3, v4, v9, v14, v15], s[4:7] a16
|
||||
; GCN-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13]
|
||||
; GCN-NEXT: s_and_b32 s0, s0, vcc_lo
|
||||
; GCN-NEXT: s_and_saveexec_b32 s0, s0
|
||||
; GCN-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
||||
|
@ -155,9 +153,8 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr
|
|||
; GCN-NEXT: v_readfirstlane_b32 s6, v17
|
||||
; GCN-NEXT: v_readfirstlane_b32 s7, v18
|
||||
; GCN-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[15:16]
|
||||
; GCN-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[17:18]
|
||||
; GCN-NEXT: s_nop 2
|
||||
; GCN-NEXT: image_bvh64_intersect_ray v[19:22], [v0, v1, v2, v3, v4, v5, v7, v8, v9, v11, v12, v13], s[4:7]
|
||||
; GCN-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[17:18]
|
||||
; GCN-NEXT: s_and_b32 s0, s0, vcc_lo
|
||||
; GCN-NEXT: s_and_saveexec_b32 s0, s0
|
||||
; GCN-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
||||
|
@ -195,9 +192,8 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node
|
|||
; GCN-NEXT: v_readfirstlane_b32 s6, v13
|
||||
; GCN-NEXT: v_readfirstlane_b32 s7, v14
|
||||
; GCN-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12]
|
||||
; GCN-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14]
|
||||
; GCN-NEXT: s_nop 2
|
||||
; GCN-NEXT: image_bvh64_intersect_ray v[6:9], [v0, v1, v2, v3, v4, v5, v10, v15, v16], s[4:7] a16
|
||||
; GCN-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14]
|
||||
; GCN-NEXT: s_and_b32 s0, s0, vcc_lo
|
||||
; GCN-NEXT: s_and_saveexec_b32 s0, s0
|
||||
; GCN-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
||||
|
|
|
@ -131,7 +131,6 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out) {
|
|||
; GFX1064-NEXT: v_mad_u32_u24 v0, v0, 5, s2
|
||||
; GFX1064-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_nop 0
|
||||
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1064-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -162,7 +161,6 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out) {
|
|||
; GFX1032-NEXT: v_mad_u32_u24 v0, v0, 5, s2
|
||||
; GFX1032-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_nop 0
|
||||
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1032-NEXT: s_endpgm
|
||||
entry:
|
||||
|
@ -518,7 +516,6 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) {
|
|||
; GFX1064-NEXT: v_add_nc_u32_e32 v0, s3, v0
|
||||
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_nop 0
|
||||
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1064-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -569,7 +566,6 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) {
|
|||
; GFX1032-NEXT: v_add_nc_u32_e32 v0, s3, v0
|
||||
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_nop 0
|
||||
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1032-NEXT: s_endpgm
|
||||
entry:
|
||||
|
@ -754,7 +750,6 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) {
|
|||
; GFX1064-NEXT: v_add_nc_u32_e32 v0, s3, v0
|
||||
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_nop 0
|
||||
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1064-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -805,7 +800,6 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) {
|
|||
; GFX1032-NEXT: v_add_nc_u32_e32 v0, s3, v0
|
||||
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_nop 0
|
||||
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1032-NEXT: s_endpgm
|
||||
entry:
|
||||
|
@ -990,7 +984,6 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) {
|
|||
; GFX1064-NEXT: v_add_nc_u32_e32 v0, s3, v0
|
||||
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_nop 0
|
||||
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1064-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -1041,7 +1034,6 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) {
|
|||
; GFX1032-NEXT: v_add_nc_u32_e32 v0, s3, v0
|
||||
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_nop 0
|
||||
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1032-NEXT: s_endpgm
|
||||
entry:
|
||||
|
@ -1186,7 +1178,6 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
|
|||
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX1064-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_nop 1
|
||||
; GFX1064-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
|
||||
; GFX1064-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -1219,7 +1210,6 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
|
|||
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX1032-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_nop 1
|
||||
; GFX1032-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
|
||||
; GFX1032-NEXT: s_endpgm
|
||||
entry:
|
||||
|
@ -2040,7 +2030,6 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
|
|||
; GFX1064-NEXT: v_sub_nc_u32_e32 v0, s3, v0
|
||||
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_nop 0
|
||||
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1064-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -2091,7 +2080,6 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
|
|||
; GFX1032-NEXT: v_sub_nc_u32_e32 v0, s3, v0
|
||||
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_nop 0
|
||||
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1032-NEXT: s_endpgm
|
||||
entry:
|
||||
|
@ -2762,7 +2750,6 @@ define amdgpu_kernel void @and_i32_varying(i32 addrspace(1)* %out) {
|
|||
; GFX1064-NEXT: v_and_b32_e32 v0, s3, v0
|
||||
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_nop 0
|
||||
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1064-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -2813,7 +2800,6 @@ define amdgpu_kernel void @and_i32_varying(i32 addrspace(1)* %out) {
|
|||
; GFX1032-NEXT: v_and_b32_e32 v0, s3, v0
|
||||
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_nop 0
|
||||
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1032-NEXT: s_endpgm
|
||||
entry:
|
||||
|
@ -2998,7 +2984,6 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
|
|||
; GFX1064-NEXT: v_or_b32_e32 v0, s3, v0
|
||||
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_nop 0
|
||||
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1064-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -3049,7 +3034,6 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
|
|||
; GFX1032-NEXT: v_or_b32_e32 v0, s3, v0
|
||||
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_nop 0
|
||||
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1032-NEXT: s_endpgm
|
||||
entry:
|
||||
|
@ -3234,7 +3218,6 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
|
|||
; GFX1064-NEXT: v_xor_b32_e32 v0, s3, v0
|
||||
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_nop 0
|
||||
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1064-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -3285,7 +3268,6 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
|
|||
; GFX1032-NEXT: v_xor_b32_e32 v0, s3, v0
|
||||
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_nop 0
|
||||
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1032-NEXT: s_endpgm
|
||||
entry:
|
||||
|
@ -3472,7 +3454,6 @@ define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) {
|
|||
; GFX1064-NEXT: v_max_i32_e32 v0, s3, v0
|
||||
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_nop 0
|
||||
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1064-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -3525,7 +3506,6 @@ define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) {
|
|||
; GFX1032-NEXT: v_max_i32_e32 v0, s3, v0
|
||||
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_nop 0
|
||||
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1032-NEXT: s_endpgm
|
||||
entry:
|
||||
|
@ -3891,7 +3871,6 @@ define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) {
|
|||
; GFX1064-NEXT: v_min_i32_e32 v0, s3, v0
|
||||
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_nop 0
|
||||
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1064-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -3944,7 +3923,6 @@ define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) {
|
|||
; GFX1032-NEXT: v_min_i32_e32 v0, s3, v0
|
||||
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_nop 0
|
||||
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1032-NEXT: s_endpgm
|
||||
entry:
|
||||
|
@ -4308,7 +4286,6 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
|
|||
; GFX1064-NEXT: v_max_u32_e32 v0, s3, v0
|
||||
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_nop 0
|
||||
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1064-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -4359,7 +4336,6 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
|
|||
; GFX1032-NEXT: v_max_u32_e32 v0, s3, v0
|
||||
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_nop 0
|
||||
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1032-NEXT: s_endpgm
|
||||
entry:
|
||||
|
@ -4720,7 +4696,6 @@ define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) {
|
|||
; GFX1064-NEXT: v_min_u32_e32 v0, s3, v0
|
||||
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_nop 0
|
||||
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1064-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -4771,7 +4746,6 @@ define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) {
|
|||
; GFX1032-NEXT: v_min_u32_e32 v0, s3, v0
|
||||
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_nop 0
|
||||
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX1032-NEXT: s_endpgm
|
||||
entry:
|
||||
|
|
|
@ -198,7 +198,6 @@ define amdgpu_ps float @global_xchg_saddr_uniform_ptr_in_vgprs_rtn(i32 %voffset,
|
|||
; GFX10-NEXT: v_readfirstlane_b32 s1, v3
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: s_nop 2
|
||||
; GFX10-NEXT: global_atomic_swap v0, v0, v1, s[0:1] glc
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: buffer_gl0_inv
|
||||
|
@ -238,7 +237,6 @@ define amdgpu_ps float @global_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset(i32
|
|||
; GFX10-NEXT: v_readfirstlane_b32 s1, v3
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: s_nop 2
|
||||
; GFX10-NEXT: global_atomic_swap v0, v0, v1, s[0:1] offset:42 glc
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: buffer_gl0_inv
|
||||
|
@ -279,7 +277,6 @@ define amdgpu_ps void @global_xchg_saddr_uniform_ptr_in_vgprs_nortn(i32 %voffset
|
|||
; GFX10-NEXT: v_readfirstlane_b32 s1, v3
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: s_nop 2
|
||||
; GFX10-NEXT: global_atomic_swap v0, v1, s[0:1]
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: buffer_gl0_inv
|
||||
|
@ -318,7 +315,6 @@ define amdgpu_ps void @global_xchg_saddr_uniform_ptr_in_vgprs_nortn_immoffset(i3
|
|||
; GFX10-NEXT: v_readfirstlane_b32 s1, v3
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: s_nop 2
|
||||
; GFX10-NEXT: global_atomic_swap v0, v1, s[0:1] offset:42
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: buffer_gl0_inv
|
||||
|
|
|
@ -788,17 +788,28 @@ define amdgpu_ps float @global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_
|
|||
|
||||
; Base pointer is uniform, but also in VGPRs
|
||||
define amdgpu_ps float @global_load_saddr_uniform_ptr_in_vgprs(i32 %voffset) {
|
||||
; GCN-LABEL: global_load_saddr_uniform_ptr_in_vgprs:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GCN-NEXT: ds_read_b64 v[1:2], v1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_readfirstlane_b32 s0, v1
|
||||
; GCN-NEXT: v_readfirstlane_b32 s1, v2
|
||||
; GCN-NEXT: s_nop 4
|
||||
; GCN-NEXT: global_load_ubyte v0, v0, s[0:1]
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
; GFX9-LABEL: global_load_saddr_uniform_ptr_in_vgprs:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX9-NEXT: ds_read_b64 v[1:2], v1
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s0, v1
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s1, v2
|
||||
; GFX9-NEXT: s_nop 4
|
||||
; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1]
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-LABEL: global_load_saddr_uniform_ptr_in_vgprs:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX10-NEXT: ds_read_b64 v[1:2], v1
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: v_readfirstlane_b32 s0, v1
|
||||
; GFX10-NEXT: v_readfirstlane_b32 s1, v2
|
||||
; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1]
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%sbase = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(3)* @ptr.in.lds
|
||||
%zext.offset = zext i32 %voffset to i64
|
||||
%gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
|
||||
|
@ -810,17 +821,28 @@ define amdgpu_ps float @global_load_saddr_uniform_ptr_in_vgprs(i32 %voffset) {
|
|||
|
||||
; Base pointer is uniform, but also in VGPRs, with imm offset
|
||||
define amdgpu_ps float @global_load_saddr_uniform_ptr_in_vgprs_immoffset(i32 %voffset) {
|
||||
; GCN-LABEL: global_load_saddr_uniform_ptr_in_vgprs_immoffset:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GCN-NEXT: ds_read_b64 v[1:2], v1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_readfirstlane_b32 s0, v1
|
||||
; GCN-NEXT: v_readfirstlane_b32 s1, v2
|
||||
; GCN-NEXT: s_nop 4
|
||||
; GCN-NEXT: global_load_ubyte v0, v0, s[0:1] offset:42
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
; GFX9-LABEL: global_load_saddr_uniform_ptr_in_vgprs_immoffset:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX9-NEXT: ds_read_b64 v[1:2], v1
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s0, v1
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s1, v2
|
||||
; GFX9-NEXT: s_nop 4
|
||||
; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:42
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-LABEL: global_load_saddr_uniform_ptr_in_vgprs_immoffset:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX10-NEXT: ds_read_b64 v[1:2], v1
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: v_readfirstlane_b32 s0, v1
|
||||
; GFX10-NEXT: v_readfirstlane_b32 s1, v2
|
||||
; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:42
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%sbase = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(3)* @ptr.in.lds
|
||||
%zext.offset = zext i32 %voffset to i64
|
||||
%gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
|
||||
|
|
|
@ -58,16 +58,26 @@ define amdgpu_ps void @global_store_saddr_i8_zext_vgpr_offset_neg2048(i8 addrspa
|
|||
|
||||
; Base pointer is uniform, but also in VGPRs
|
||||
define amdgpu_ps void @global_store_saddr_uniform_ptr_in_vgprs(i32 %voffset, i8 %data) {
|
||||
; GCN-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GCN-NEXT: ds_read_b64 v[2:3], v2
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_readfirstlane_b32 s0, v2
|
||||
; GCN-NEXT: v_readfirstlane_b32 s1, v3
|
||||
; GCN-NEXT: s_nop 4
|
||||
; GCN-NEXT: global_store_byte v0, v1, s[0:1]
|
||||
; GCN-NEXT: s_endpgm
|
||||
; GFX9-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX9-NEXT: ds_read_b64 v[2:3], v2
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s0, v2
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s1, v3
|
||||
; GFX9-NEXT: s_nop 4
|
||||
; GFX9-NEXT: global_store_byte v0, v1, s[0:1]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; GFX10-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX10-NEXT: ds_read_b64 v[2:3], v2
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: v_readfirstlane_b32 s0, v2
|
||||
; GFX10-NEXT: v_readfirstlane_b32 s1, v3
|
||||
; GFX10-NEXT: global_store_byte v0, v1, s[0:1]
|
||||
; GFX10-NEXT: s_endpgm
|
||||
%sbase = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(3)* @ptr.in.lds
|
||||
%zext.offset = zext i32 %voffset to i64
|
||||
%gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
|
||||
|
@ -77,16 +87,26 @@ define amdgpu_ps void @global_store_saddr_uniform_ptr_in_vgprs(i32 %voffset, i8
|
|||
|
||||
; Base pointer is uniform, but also in VGPRs, with imm offset
|
||||
define amdgpu_ps void @global_store_saddr_uniform_ptr_in_vgprs_immoffset(i32 %voffset, i8 %data) {
|
||||
; GCN-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GCN-NEXT: ds_read_b64 v[2:3], v2
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_readfirstlane_b32 s0, v2
|
||||
; GCN-NEXT: v_readfirstlane_b32 s1, v3
|
||||
; GCN-NEXT: s_nop 4
|
||||
; GCN-NEXT: global_store_byte v0, v1, s[0:1] offset:-120
|
||||
; GCN-NEXT: s_endpgm
|
||||
; GFX9-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX9-NEXT: ds_read_b64 v[2:3], v2
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s0, v2
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s1, v3
|
||||
; GFX9-NEXT: s_nop 4
|
||||
; GFX9-NEXT: global_store_byte v0, v1, s[0:1] offset:-120
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; GFX10-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX10-NEXT: ds_read_b64 v[2:3], v2
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: v_readfirstlane_b32 s0, v2
|
||||
; GFX10-NEXT: v_readfirstlane_b32 s1, v3
|
||||
; GFX10-NEXT: global_store_byte v0, v1, s[0:1] offset:-120
|
||||
; GFX10-NEXT: s_endpgm
|
||||
%sbase = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(3)* @ptr.in.lds
|
||||
%zext.offset = zext i32 %voffset to i64
|
||||
%gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
|
||||
|
|
|
@ -40,7 +40,7 @@ body: |
|
|||
# GCN-LABEL: name: vmem_vcc_hazard_ignore_bundle_instr
|
||||
# GCN: S_LOAD_DWORDX2_IMM
|
||||
# GCN-NEXT: }
|
||||
# GCN-NEXT: S_NOP 3
|
||||
# GFX9-NEXT: S_NOP 3
|
||||
# GCN: BUFFER_LOAD_DWORD_OFFEN
|
||||
---
|
||||
name: vmem_vcc_hazard_ignore_bundle_instr
|
||||
|
@ -60,7 +60,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: vmem_vcc_min_of_two_after_bundle
|
||||
# GCN: bb.2:
|
||||
# GCN-NEXT: S_NOP 4
|
||||
# GFX9-NEXT: S_NOP 4
|
||||
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
||||
---
|
||||
name: vmem_vcc_min_of_two_after_bundle
|
||||
|
|
|
@ -67,7 +67,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: vmem_vcc_hazard_in_bundle
|
||||
# GCN: S_LOAD_DWORDX2_IMM
|
||||
# GCN-NEXT: S_NOP 3
|
||||
# GFX9-NEXT: S_NOP 3
|
||||
# GCN: BUFFER_LOAD_DWORD_OFFEN
|
||||
---
|
||||
name: vmem_vcc_hazard_in_bundle
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
|
||||
; W64: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]]
|
||||
; W64: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
|
||||
; W64: s_nop 0
|
||||
; W64: buffer_load_format_x [[RES:v[0-9]+]], v{{[0-9]+}}, s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
|
||||
; W64: s_xor_b64 exec, exec, [[AND]]
|
||||
; W64: s_cbranch_execnz [[LOOPBB]]
|
||||
|
@ -34,7 +33,6 @@
|
|||
; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
|
||||
; W32: s_and_b32 [[AND:s[0-9]+]], vcc_lo, [[CMP0]]
|
||||
; W32: s_and_saveexec_b32 [[SAVE:s[0-9]+]], [[AND]]
|
||||
; W32: s_nop 0
|
||||
; W32: buffer_load_format_x [[RES:v[0-9]+]], v{{[0-9]+}}, s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
|
||||
; W32: s_xor_b32 exec_lo, exec_lo, [[SAVE]]
|
||||
; W32: s_cbranch_execnz [[LOOPBB]]
|
||||
|
@ -59,7 +57,6 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
|
|||
; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
|
||||
; W64: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]]
|
||||
; W64: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
|
||||
; W64: s_nop 0
|
||||
; W64: buffer_load_format_x [[RES0:v[0-9]+]], v{{[0-9]+}}, s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
|
||||
; W64: s_xor_b64 exec, exec, [[SAVE]]
|
||||
; W64: s_cbranch_execnz [[LOOPBB0]]
|
||||
|
@ -77,7 +74,6 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
|
|||
; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
|
||||
; W64: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]]
|
||||
; W64: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
|
||||
; W64: s_nop 0
|
||||
; W64: buffer_load_format_x [[RES1:v[0-9]+]], v{{[0-9]+}}, s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
|
||||
; W64: s_xor_b64 exec, exec, [[SAVE]]
|
||||
; W64: s_cbranch_execnz [[LOOPBB1]]
|
||||
|
@ -99,7 +95,6 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
|
|||
; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
|
||||
; W32: s_and_b32 [[AND:s[0-9]+]], vcc_lo, [[CMP0]]
|
||||
; W32: s_and_saveexec_b32 [[SAVE:s[0-9]+]], [[AND]]
|
||||
; W32: s_nop 0
|
||||
; W32: buffer_load_format_x [[RES0:v[0-9]+]], v{{[0-9]+}}, s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
|
||||
; W32: s_xor_b32 exec_lo, exec_lo, [[SAVE]]
|
||||
; W32: s_cbranch_execnz [[LOOPBB0]]
|
||||
|
@ -117,7 +112,6 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
|
|||
; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
|
||||
; W32: s_and_b32 [[AND:s[0-9]+]], vcc_lo, [[CMP0]]
|
||||
; W32: s_and_saveexec_b32 [[SAVE:s[0-9]+]], [[AND]]
|
||||
; W32: s_nop 0
|
||||
; W32: buffer_load_format_x [[RES1:v[0-9]+]], v8, s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
|
||||
; W32: s_xor_b32 exec_lo, exec_lo, [[SAVE]]
|
||||
; W32: s_cbranch_execnz [[LOOPBB1]]
|
||||
|
@ -150,7 +144,6 @@ entry:
|
|||
; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
|
||||
; W64: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]]
|
||||
; W64: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
|
||||
; W64: s_nop 0
|
||||
; W64: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
|
||||
; W64: s_xor_b64 exec, exec, [[SAVE]]
|
||||
; W64: s_cbranch_execnz [[LOOPBB0]]
|
||||
|
@ -171,7 +164,6 @@ entry:
|
|||
; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
|
||||
; W64: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]]
|
||||
; W64: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
|
||||
; W64: s_nop 0
|
||||
; W64: buffer_load_format_x [[RES]], [[IDX]], s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
|
||||
; W64: s_xor_b64 exec, exec, [[SAVE]]
|
||||
; W64: s_cbranch_execnz [[LOOPBB1]]
|
||||
|
@ -196,7 +188,6 @@ entry:
|
|||
; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
|
||||
; W32: s_and_b32 [[AND:s[0-9]+]], vcc_lo, [[CMP0]]
|
||||
; W32: s_and_saveexec_b32 [[SAVE:s[0-9]+]], [[AND]]
|
||||
; W32: s_nop 0
|
||||
; W32: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
|
||||
; W32: s_xor_b32 exec_lo, exec_lo, [[SAVE]]
|
||||
; W32: s_cbranch_execnz [[LOOPBB0]]
|
||||
|
@ -217,7 +208,6 @@ entry:
|
|||
; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
|
||||
; W32: s_and_b32 [[AND:s[0-9]+]], vcc_lo, [[CMP0]]
|
||||
; W32: s_and_saveexec_b32 [[SAVE:s[0-9]+]], [[AND]]
|
||||
; W32: s_nop 0
|
||||
; W32: buffer_load_format_x [[RES]], [[IDX]], s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
|
||||
; W32: s_xor_b32 exec_lo, exec_lo, [[SAVE]]
|
||||
; W32: s_cbranch_execnz [[LOOPBB1]]
|
||||
|
|
|
@ -67,7 +67,6 @@ define amdgpu_kernel void @kernel_background_evaluate(float addrspace(5)* %kg, <
|
|||
; FLATSCR-NEXT: s_cbranch_execz BB0_2
|
||||
; FLATSCR-NEXT: ; %bb.1: ; %if.then4.i
|
||||
; FLATSCR-NEXT: s_movk_i32 vcc_lo, 0x4000
|
||||
; FLATSCR-NEXT: s_nop 1
|
||||
; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, vcc_lo offset:4
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: v_add_nc_u32_e32 v0, v1, v0
|
||||
|
|
|
@ -26,7 +26,6 @@ define void @vgpr_descriptor_waterfall_loop_idom_update(<4 x i32>* %arg) #0 {
|
|||
; GCN-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3]
|
||||
; GCN-NEXT: s_and_b32 s4, vcc_lo, s4
|
||||
; GCN-NEXT: s_and_saveexec_b32 s4, s4
|
||||
; GCN-NEXT: s_nop 0
|
||||
; GCN-NEXT: buffer_store_dword v0, v0, s[8:11], 0 offen
|
||||
; GCN-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GCN-NEXT: s_xor_b32 exec_lo, exec_lo, s4
|
||||
|
|
Loading…
Reference in New Issue