llvm-project/llvm/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir

581 lines
24 KiB
YAML

# RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s
# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK %s
---
# Trivial clause at beginning of program
name: trivial_clause_load_flat4_x1
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x1
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# Trivial clause at beginning of program
name: trivial_clause_load_flat4_x2
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x2
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr1 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr1 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# Trivial clause at beginning of program
name: trivial_clause_load_flat4_x3
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x3
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr1 = FLAT_LOAD_DWORD %vgpr5_vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr1 = FLAT_LOAD_DWORD %vgpr5_vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr2 = FLAT_LOAD_DWORD %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# Trivial clause at beginning of program
name: trivial_clause_load_flat4_x4
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x4
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr1 = FLAT_LOAD_DWORD %vgpr6_vgpr7, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr8_vgpr9, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr3 = FLAT_LOAD_DWORD %vgpr10_vgpr11, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr1 = FLAT_LOAD_DWORD %vgpr6_vgpr7, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr2 = FLAT_LOAD_DWORD %vgpr8_vgpr9, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr3 = FLAT_LOAD_DWORD %vgpr10_vgpr11, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# Reuse of same input pointer is OK
name: trivial_clause_load_flat4_x2_sameptr
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x2_sameptr
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr1 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr1 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# 32-bit load partially clobbers its own ptr reg
name: flat_load4_overwrite_ptr_lo
body: |
bb.0:
; GCN-LABEL: name: flat_load4_overwrite_ptr_lo
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# 32-bit load partially clobbers its own ptr reg
name: flat_load4_overwrite_ptr_hi
body: |
bb.0:
; GCN-LABEL: name: flat_load4_overwrite_ptr_hi
; GCN: %vgpr1 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr1 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# 64-bit load clobbers its own ptr reg
name: flat_load8_overwrite_ptr
body: |
bb.0:
; GCN-LABEL: name: flat_load8_overwrite_ptr
; GCN: %vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# vmcnt has 4 bits, so maximum 16 outstanding loads. The waitcnt
# breaks the clause.
name: break_clause_at_max_clause_size_flat_load4
body: |
bb.0:
; GCN-LABEL: name: break_clause_at_max_clause_size_flat_load4
; GCN: %vgpr2 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr3 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr4 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr5 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr6 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr7 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr8 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr9 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr10 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr11 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr12 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr13 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr14 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr15 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr16 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr17 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %sgpr0 = S_MOV_B32 %sgpr0, implicit %vgpr2, implicit %vgpr3, implicit %vgpr4, implicit %vgpr5, implicit %vgpr6, implicit %vgpr7, implicit %vgpr8, implicit %vgpr9, implicit %vgpr10, implicit %vgpr11, implicit %vgpr12, implicit %vgpr13, implicit %vgpr14, implicit %vgpr15, implicit %vgpr16, implicit %vgpr17, implicit %vgpr18
; GCN-NEXT: S_ENDPGM
%vgpr2 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr3 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr4 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr5 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr6 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr7 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr8 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr9 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr10 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr11 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr12 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr13 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr14 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr15 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr16 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr17 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
%sgpr0 = S_MOV_B32 %sgpr0, implicit %vgpr2, implicit %vgpr3, implicit %vgpr4, implicit %vgpr5, implicit %vgpr6, implicit %vgpr7, implicit %vgpr8, implicit %vgpr9, implicit %vgpr10, implicit %vgpr11, implicit %vgpr12, implicit %vgpr13, implicit %vgpr14, implicit %vgpr15, implicit %vgpr16, implicit %vgpr17, implicit %vgpr18
S_ENDPGM
...
---
name: break_clause_simple_load_flat4_lo_ptr
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_flat4_lo_ptr
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
name: break_clause_simple_load_flat4_hi_ptr
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_flat4_hi_ptr
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr3 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr3 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
name: break_clause_simple_load_flat8_ptr
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_flat8_ptr
; GCN: %vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
name: break_clause_simple_load_flat16_ptr
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_flat16_ptr
; GCN: %vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 %vgpr6_vgpr7, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 %vgpr6_vgpr7, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# The clause is broken by the waitcnt inserted at the end of the
# block, so no nop is needed.
name: break_clause_block_boundary_load_flat8_ptr
body: |
; GCN-LABEL: name: break_clause_block_boundary_load_flat8_ptr
; GCN: bb.0:
; GCN-NEXT: successors: %bb.1(0x80000000)
; GCN: %vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN: bb.1:
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
bb.0:
%vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
bb.1:
%vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# The load clobbers the pointer of the store, so it needs to break.
name: break_clause_store_load_into_ptr_flat4
body: |
bb.0:
; GCN-LABEL: name: break_clause_store_load_into_ptr_flat4
; GCN: FLAT_STORE_DWORD %vgpr2_vgpr3, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
FLAT_STORE_DWORD %vgpr2_vgpr3, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# The load clobbers the data of the store, so it needs to break.
# FIXME: Would it be better to s_nop and wait later?
name: break_clause_store_load_into_data_flat4
body: |
bb.0:
; GCN-LABEL: name: break_clause_store_load_into_data_flat4
; GCN: FLAT_STORE_DWORD %vgpr2_vgpr3, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr0 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
FLAT_STORE_DWORD %vgpr2_vgpr3, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr0 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# Regular VALU instruction breaks clause, no nop needed
name: valu_inst_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: valu_inst_breaks_clause
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr8 = V_MOV_B32_e32 0, implicit %exec
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr8 = V_MOV_B32_e32 0, implicit %exec
%vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# Regular SALU instruction breaks clause, no nop needed
name: salu_inst_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: salu_inst_breaks_clause
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %sgpr8 = S_MOV_B32 0
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
%sgpr8 = S_MOV_B32 0
%vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
name: ds_inst_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: ds_inst_breaks_clause
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr8 = DS_READ_B32 %vgpr9, 0, 0, implicit %m0, implicit %exec
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr8 = DS_READ_B32 %vgpr9, 0, 0, implicit %m0, implicit %exec
%vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
name: smrd_inst_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: smrd_inst_breaks_clause
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %sgpr8 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 0, 0
; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
%sgpr8 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 0, 0
%vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# FIXME: Should this be handled?
name: implicit_use_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: implicit_use_breaks_clause
; GCN: %vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr, implicit %vgpr4_vgpr5
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 %vgpr6_vgpr7, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr, implicit %vgpr4_vgpr5
%vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 %vgpr6_vgpr7, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
name: trivial_clause_load_mubuf4_x2
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_mubuf4_x2
; GCN: %vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
; GCN-NEXT: %vgpr3 = BUFFER_LOAD_DWORD_OFFEN %vgpr4, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
; GCN-NEXT: S_ENDPGM
%vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
%vgpr3 = BUFFER_LOAD_DWORD_OFFEN %vgpr4, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
S_ENDPGM
...
---
name: break_clause_simple_load_mubuf_offen_ptr
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_mubuf_offen_ptr
; GCN: %vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr2 = BUFFER_LOAD_DWORD_OFFEN %vgpr3, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
; GCN-NEXT: S_ENDPGM
%vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
%vgpr2 = BUFFER_LOAD_DWORD_OFFEN %vgpr3, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
S_ENDPGM
...
---
# BUFFER instructions overwriting their own inputs is supposedly OK.
name: mubuf_load4_overwrite_ptr
body: |
bb.0:
; GCN-LABEL: name: mubuf_load4_overwrite_ptr
; GCN: %vgpr0 = BUFFER_LOAD_DWORD_OFFEN %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
; GCN-NEXT: %vgpr1 = V_MOV_B32_e32 0, implicit %exec
; GCN-NEXT: %vgpr2 = V_MOV_B32_e32 %vgpr0, implicit %exec
; GCN-NEXT: S_ENDPGM
%vgpr0 = BUFFER_LOAD_DWORD_OFFEN %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
%vgpr1 = V_MOV_B32_e32 0, implicit %exec
%vgpr2 = V_MOV_B32_e32 %vgpr0, implicit %exec
S_ENDPGM
...
---
# Break a clause from interference between mubuf and flat instructions
name: break_clause_flat_load_mubuf_load
body: |
bb.0:
; GCN-LABEL: name: break_clause_flat_load_mubuf_load
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr2 = BUFFER_LOAD_DWORD_OFFEN %vgpr1, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr2 = BUFFER_LOAD_DWORD_OFFEN %vgpr1, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
S_ENDPGM
...
# Break a clause from interference between mubuf and flat instructions
# GCN-LABEL: name: break_clause_mubuf_load_flat_load
# GCN: bb.0:
# GCN-NEXT: %vgpr0 = BUFFER_LOAD_DWORD_OFFEN %vgpr1, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4
# XNACK-NEXT: S_NOP 0
# GCN-NEXT: %vgpr1 = FLAT_LOAD_DWORD %vgpr2_vgpr3
# GCN-NEXT: S_ENDPGM
name: break_clause_mubuf_load_flat_load
body: |
bb.0:
%vgpr0 = BUFFER_LOAD_DWORD_OFFEN %vgpr1, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
%vgpr1 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
name: break_clause_atomic_rtn_into_ptr_flat4
body: |
bb.0:
; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_flat4
; GCN: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr4 = FLAT_ATOMIC_ADD_RTN %vgpr5_vgpr6, %vgpr7, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr4 = FLAT_ATOMIC_ADD_RTN %vgpr5_vgpr6, %vgpr7, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
name: break_clause_atomic_nortn_ptr_load_flat4
body: |
bb.0:
; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_flat4
; GCN: FLAT_ATOMIC_ADD %vgpr0_vgpr1, %vgpr2, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
FLAT_ATOMIC_ADD %vgpr0_vgpr1, %vgpr2, 0, 0, implicit %exec, implicit %flat_scr
%vgpr2 = FLAT_LOAD_DWORD %vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
name: break_clause_atomic_rtn_into_ptr_mubuf4
body: |
bb.0:
; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_mubuf4
; GCN: %vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN %vgpr2, %vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, implicit %exec
; GCN-NEXT: S_ENDPGM
%vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
%vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN %vgpr2, %vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, implicit %exec
S_ENDPGM
...
---
name: break_clause_atomic_nortn_ptr_load_mubuf4
body: |
bb.0:
; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_mubuf4
; GCN: BUFFER_ATOMIC_ADD_OFFEN %vgpr0, %vgpr1, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, implicit %exec
; GCN-NEXT: %vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
; GCN-NEXT: S_ENDPGM
BUFFER_ATOMIC_ADD_OFFEN %vgpr0, %vgpr1, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, implicit %exec
%vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
S_ENDPGM
...
---
# Make sure there is no assert on mubuf instructions which do not have
# vaddr, and don't add register to track.
name: no_break_clause_mubuf_load_novaddr
body: |
bb.0:
; GCN-LABEL: name: no_break_clause_mubuf_load_novaddr
; GCN: %vgpr1 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
; GCN-NEXT: %vgpr3 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
; GCN-NEXT: S_ENDPGM
%vgpr1 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
%vgpr3 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
S_ENDPGM
...
---
# Loads and stores using different addresses theoretically does not
# need a nop
name: mix_load_store_clause
body: |
bb.0:
; GCN-LABEL: name: mix_load_store_clause
; GCN: FLAT_STORE_DWORD %vgpr0_vgpr1, %vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr10 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: FLAT_STORE_DWORD %vgpr2_vgpr3, %vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr11 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
FLAT_STORE_DWORD %vgpr0_vgpr1, %vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr10 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
FLAT_STORE_DWORD %vgpr2_vgpr3, %vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr11 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# Loads and stores using the same address needs a nop.
name: mix_load_store_clause_same_address
body: |
bb.0:
; GCN-LABEL: name: mix_load_store_clause_same_address
; GCN: FLAT_STORE_DWORD %vgpr0_vgpr1, %vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr10 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: FLAT_STORE_DWORD %vgpr0_vgpr1, %vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr11 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
FLAT_STORE_DWORD %vgpr0_vgpr1, %vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr10 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
FLAT_STORE_DWORD %vgpr0_vgpr1, %vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr11 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...