[AMDGPU] Skip invariant loads when avoiding WAR conflicts

No need to handle invariant loads when avoiding WAR conflicts, as
there cannot be a vector store to the same memory location.

Reviewed By: foad

Differential Revision: https://reviews.llvm.org/D101177
This commit is contained in:
Piotr Sobczak 2021-05-12 09:23:59 +02:00
parent cbd93cee9b
commit 68137ef568
4 changed files with 66 additions and 46 deletions

View File

@ -1521,9 +1521,14 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
if (TII->isSMRD(Inst)) {
for (const MachineMemOperand *Memop : Inst.memoperands()) {
// No need to handle invariant loads when avoiding WAR conflicts, as
// there cannot be a vector store to the same memory location.
if (!Memop->isInvariant()) {
const Value *Ptr = Memop->getValue();
assert(Ptr);
SLoadAddresses.insert(std::make_pair(Ptr, Inst.getParent()));
}
}
if (ST->hasReadVCCZBug()) {
// This smem read could complete and clobber vccz at any time.
VCCZCorrect = false;

View File

@ -200,7 +200,6 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 {
; GFX9-NEXT: s_addc_u32 s5, s5, foo@gotpcrel32@hi+12
; GFX9-NEXT: v_writelane_b32 v43, s35, 1
; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
@ -210,6 +209,7 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 {
; GFX9-NEXT: v_mul_u32_u24_e32 v0, v41, v40
; GFX9-NEXT: v_writelane_b32 v43, s31, 3
; GFX9-NEXT: v_and_b32_e32 v42, 0xffffff, v40
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT: v_mad_u32_u24 v40, v41, v40, v42
; GFX9-NEXT: v_mov_b32_e32 v0, v40

View File

@ -169,7 +169,7 @@ define void @func_call_align1024_bp_gets_vgpr_spill(<32 x i32> %a, i32 %b) #0 {
; GCN-NEXT: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s34
; GCN-DAG: s_add_u32 s32, s32, 0x30000
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GCN: s_swappc_b64 s[30:31], s[4:5]
; GCN: s_sub_u32 s32, s32, 0x30000
; GCN-NEXT: v_readlane_b32 s33, [[VGPR_REG]], 2

View File

@ -23,3 +23,18 @@ body: |
S_BRANCH %bb.1
...
# Check that the waitcnt pass does *not* insert a waitcnt inst after S_BUFFER_LOAD.
# WAR hazard does not apply here, because S_BUFFER_LOAD accesses invariant memory.
...
# CHECK-LABEL: name: waitcnt-no-war-wait
# CHECK: S_WAITCNT 0
# CHECK-NEXT: S_BUFFER_LOAD_DWORD_IMM
# CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact
name: waitcnt-no-war-wait
body: |
bb.0:
renamable $sgpr8 = S_BUFFER_LOAD_DWORD_IMM renamable $sgpr0_sgpr1_sgpr2_sgpr3, 276, 0 :: (dereferenceable invariant load 4)
TBUFFER_STORE_FORMAT_X_OFFEN_exact killed renamable $vgpr0, renamable $vgpr15, renamable $sgpr4_sgpr5_sgpr6_sgpr7, renamable $sgpr9, 0, 116, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
...