llvm-project/llvm/test/CodeGen/AMDGPU/break-smem-soft-clauses.mir

352 lines
12 KiB
YAML

# RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s
# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK %s
---
# Trivial clause at beginning of program
name: trivial_smem_clause_load_smrd4_x1
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x1
; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: S_ENDPGM
%sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
S_ENDPGM
...
---
# Trivial clause at beginning of program
name: trivial_smem_clause_load_smrd4_x2
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2
; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %sgpr1 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM
%sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
%sgpr1 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
S_ENDPGM
...
---
# Trivial clause at beginning of program
name: trivial_smem_clause_load_smrd4_x3
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x3
; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
; GCN-NEXT: %sgpr1 = S_LOAD_DWORD_IMM %sgpr6_sgpr7, 0, 0
; GCN-NEXT: %sgpr2 = S_LOAD_DWORD_IMM %sgpr14_sgpr15, 0, 0
; GCN-NEXT: S_ENDPGM
%sgpr0 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
%sgpr1 = S_LOAD_DWORD_IMM %sgpr6_sgpr7, 0, 0
%sgpr2 = S_LOAD_DWORD_IMM %sgpr14_sgpr15, 0, 0
S_ENDPGM
...
---
# Trivial clause at beginning of program
name: trivial_smem_clause_load_smrd4_x4
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x4
; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
; GCN-NEXT: %sgpr1 = S_LOAD_DWORD_IMM %sgpr8_sgpr9, 0, 0
; GCN-NEXT: %sgpr2 = S_LOAD_DWORD_IMM %sgpr14_sgpr15, 0, 0
; GCN-NEXT: %sgpr3 = S_LOAD_DWORD_IMM %sgpr16_sgpr17, 0, 0
; GCN-NEXT: S_ENDPGM
%sgpr0 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
%sgpr1 = S_LOAD_DWORD_IMM %sgpr8_sgpr9, 0, 0
%sgpr2 = S_LOAD_DWORD_IMM %sgpr14_sgpr15, 0, 0
%sgpr3 = S_LOAD_DWORD_IMM %sgpr16_sgpr17, 0, 0
S_ENDPGM
...
---
# Reuse of same input pointer is OK
name: trivial_smem_clause_load_smrd4_x2_sameptr
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2_sameptr
; GCN: %sgpr12 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %sgpr13 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: S_ENDPGM
%sgpr12 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
%sgpr13 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
S_ENDPGM
...
---
# 32-bit load partially clobbers its own ptr reg
name: smrd_load4_overwrite_ptr_lo
body: |
bb.0:
; GCN-LABEL: name: smrd_load4_overwrite_ptr_lo
; GCN: %sgpr10 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: S_ENDPGM
%sgpr10 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
S_ENDPGM
...
---
# 32-bit load partially clobbers its own ptr reg
name: smrd_load4_overwrite_ptr_hi
body: |
bb.0:
; GCN-LABEL: name: smrd_load4_overwrite_ptr_hi
; GCN: %sgpr11 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: S_ENDPGM
%sgpr11 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
S_ENDPGM
...
---
# 64-bit load clobbers its own ptr reg
name: smrd_load8_overwrite_ptr
body: |
bb.0:
; GCN-LABEL: name: smrd_load8_overwrite_ptr
; GCN: %sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: S_ENDPGM
%sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0
S_ENDPGM
...
---
# vmcnt has 4 bits, so maximum 16 outstanding loads. The waitcnt
# breaks the clause.
name: break_smem_clause_at_max_smem_clause_size_smrd_load4
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_at_max_smem_clause_size_smrd_load4
; GCN: %sgpr13 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %sgpr14 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %sgpr15 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %sgpr16 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %sgpr17 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %sgpr18 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %sgpr19 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %sgpr20 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %sgpr21 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %sgpr22 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %sgpr23 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %sgpr24 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %sgpr25 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %sgpr26 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %sgpr27 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %sgpr28 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %sgpr0 = S_LOAD_DWORD_IMM %sgpr30_sgpr31, 0, 0
; GCN-NEXT: %sgpr0 = S_MOV_B32 %sgpr0, implicit %sgpr13, implicit %sgpr14, implicit %sgpr15, implicit %sgpr16, implicit %sgpr17, implicit %sgpr18, implicit %sgpr19, implicit %sgpr20, implicit %sgpr21, implicit %sgpr22, implicit %sgpr23, implicit %sgpr24, implicit %sgpr25, implicit %sgpr26, implicit %sgpr27, implicit %sgpr28
; GCN-NEXT: S_ENDPGM
%sgpr13 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
%sgpr14 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
%sgpr15 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
%sgpr16 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
%sgpr17 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
%sgpr18 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
%sgpr19 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
%sgpr20 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
%sgpr21 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
%sgpr22 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
%sgpr23 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
%sgpr24 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
%sgpr25 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
%sgpr26 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
%sgpr27 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
%sgpr28 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
%sgpr0 = S_LOAD_DWORD_IMM %sgpr30_sgpr31, 0, 0
%sgpr0 = S_MOV_B32 %sgpr0, implicit %sgpr13, implicit %sgpr14, implicit %sgpr15, implicit %sgpr16, implicit %sgpr17, implicit %sgpr18, implicit %sgpr19, implicit %sgpr20, implicit %sgpr21, implicit %sgpr22, implicit %sgpr23, implicit %sgpr24, implicit %sgpr25, implicit %sgpr26, implicit %sgpr27, implicit %sgpr28
S_ENDPGM
...
---
name: break_smem_clause_simple_load_smrd4_lo_ptr
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_lo_ptr
; GCN: %sgpr10 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %sgpr12 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM
%sgpr10 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
%sgpr12 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
S_ENDPGM
...
---
name: break_smem_clause_simple_load_smrd4_hi_ptr
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_hi_ptr
; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %sgpr3 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM
%sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
%sgpr3 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
S_ENDPGM
...
---
name: break_smem_clause_simple_load_smrd8_ptr
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_simple_load_smrd8_ptr
; GCN: %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM %sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM
%sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0
%sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM %sgpr12_sgpr13, 0, 0
S_ENDPGM
...
---
name: break_smem_clause_simple_load_smrd16_ptr
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_simple_load_smrd16_ptr
; GCN: %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM %sgpr6_sgpr7, 0, 0
; GCN-NEXT: S_ENDPGM
%sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0
%sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM %sgpr6_sgpr7, 0, 0
S_ENDPGM
...
---
name: break_smem_clause_block_boundary_load_smrd8_ptr
body: |
; GCN-LABEL: name: break_smem_clause_block_boundary_load_smrd8_ptr
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
; GCN: %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0
; GCN: bb.1:
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM %sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM
bb.0:
%sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0
bb.1:
%sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM %sgpr12_sgpr13, 0, 0
S_ENDPGM
...
---
# The load clobbers the pointer of the store, so it needs to break.
name: break_smem_clause_store_load_into_ptr_smrd4
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_store_load_into_ptr_smrd4
; GCN: S_STORE_DWORD_IMM %sgpr16, %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %sgpr12 = S_LOAD_DWORD_IMM %sgpr14_sgpr15, 0, 0
; GCN-NEXT: S_ENDPGM
S_STORE_DWORD_IMM %sgpr16, %sgpr10_sgpr11, 0, 0
%sgpr12 = S_LOAD_DWORD_IMM %sgpr14_sgpr15, 0, 0
S_ENDPGM
...
---
# The load clobbers the data of the store, so it needs to break.
# FIXME: Would it be better to s_nop and wait later?
name: break_smem_clause_store_load_into_data_smrd4
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_store_load_into_data_smrd4
; GCN: S_STORE_DWORD_IMM %sgpr8, %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %sgpr8 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM
S_STORE_DWORD_IMM %sgpr8, %sgpr10_sgpr11, 0, 0
%sgpr8 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
S_ENDPGM
...
---
# Regular VALU instruction breaks clause, no nop needed
name: valu_inst_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: valu_inst_breaks_smem_clause
; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %vgpr8 = V_MOV_B32_e32 0, implicit %exec
; GCN-NEXT: %sgpr2 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM
%sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
%vgpr8 = V_MOV_B32_e32 0, implicit %exec
%sgpr2 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
S_ENDPGM
...
---
# Regular SALU instruction breaks clause, no nop needed
name: salu_inst_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: salu_inst_breaks_smem_clause
; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %sgpr8 = S_MOV_B32 0
; GCN-NEXT: %sgpr2 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM
%sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
%sgpr8 = S_MOV_B32 0
%sgpr2 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
S_ENDPGM
...
---
name: ds_inst_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: ds_inst_breaks_smem_clause
; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %vgpr8 = DS_READ_B32 %vgpr9, 0, 0, implicit %m0, implicit %exec
; GCN-NEXT: %sgpr2 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM
%sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
%vgpr8 = DS_READ_B32 %vgpr9, 0, 0, implicit %m0, implicit %exec
%sgpr2 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
S_ENDPGM
...
---
name: flat_inst_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: flat_inst_breaks_smem_clause
; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
; GCN-NEXT: %vgpr0 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %sgpr2 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM
%sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
%vgpr0 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%sgpr2 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
S_ENDPGM
...
---
# FIXME: Should this be handled?
name: implicit_use_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: implicit_use_breaks_smem_clause
; GCN: %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0, implicit %sgpr12_sgpr13
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM %sgpr6_sgpr7, 0, 0
; GCN-NEXT: S_ENDPGM
%sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0, implicit %sgpr12_sgpr13
%sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM %sgpr6_sgpr7, 0, 0
S_ENDPGM
...