forked from OSchip/llvm-project
352 lines
12 KiB
YAML
352 lines
12 KiB
YAML
# RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s
|
|
# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK %s
|
|
|
|
---
|
|
# Trivial clause at beginning of program
|
|
name: trivial_smem_clause_load_smrd4_x1
|
|
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x1
|
|
; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: S_ENDPGM
|
|
%sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
S_ENDPGM
|
|
...
|
|
---
|
|
# Trivial clause at beginning of program
|
|
name: trivial_smem_clause_load_smrd4_x2
|
|
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2
|
|
; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %sgpr1 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
|
|
; GCN-NEXT: S_ENDPGM
|
|
%sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
%sgpr1 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
|
|
S_ENDPGM
|
|
...
|
|
---
|
|
# Trivial clause at beginning of program
|
|
name: trivial_smem_clause_load_smrd4_x3
|
|
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x3
|
|
; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
|
|
; GCN-NEXT: %sgpr1 = S_LOAD_DWORD_IMM %sgpr6_sgpr7, 0, 0
|
|
; GCN-NEXT: %sgpr2 = S_LOAD_DWORD_IMM %sgpr14_sgpr15, 0, 0
|
|
; GCN-NEXT: S_ENDPGM
|
|
%sgpr0 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
|
|
%sgpr1 = S_LOAD_DWORD_IMM %sgpr6_sgpr7, 0, 0
|
|
%sgpr2 = S_LOAD_DWORD_IMM %sgpr14_sgpr15, 0, 0
|
|
S_ENDPGM
|
|
...
|
|
---
|
|
# Trivial clause at beginning of program
|
|
name: trivial_smem_clause_load_smrd4_x4
|
|
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x4
|
|
; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
|
|
; GCN-NEXT: %sgpr1 = S_LOAD_DWORD_IMM %sgpr8_sgpr9, 0, 0
|
|
; GCN-NEXT: %sgpr2 = S_LOAD_DWORD_IMM %sgpr14_sgpr15, 0, 0
|
|
; GCN-NEXT: %sgpr3 = S_LOAD_DWORD_IMM %sgpr16_sgpr17, 0, 0
|
|
; GCN-NEXT: S_ENDPGM
|
|
%sgpr0 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
|
|
%sgpr1 = S_LOAD_DWORD_IMM %sgpr8_sgpr9, 0, 0
|
|
%sgpr2 = S_LOAD_DWORD_IMM %sgpr14_sgpr15, 0, 0
|
|
%sgpr3 = S_LOAD_DWORD_IMM %sgpr16_sgpr17, 0, 0
|
|
S_ENDPGM
|
|
...
|
|
---
|
|
# Reuse of same input pointer is OK
|
|
name: trivial_smem_clause_load_smrd4_x2_sameptr
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2_sameptr
|
|
; GCN: %sgpr12 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %sgpr13 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: S_ENDPGM
|
|
%sgpr12 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
%sgpr13 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
S_ENDPGM
|
|
...
|
|
---
|
|
# 32-bit load partially clobbers its own ptr reg
|
|
name: smrd_load4_overwrite_ptr_lo
|
|
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: smrd_load4_overwrite_ptr_lo
|
|
; GCN: %sgpr10 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: S_ENDPGM
|
|
%sgpr10 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
S_ENDPGM
|
|
...
|
|
---
|
|
# 32-bit load partially clobbers its own ptr reg
|
|
name: smrd_load4_overwrite_ptr_hi
|
|
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: smrd_load4_overwrite_ptr_hi
|
|
; GCN: %sgpr11 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: S_ENDPGM
|
|
%sgpr11 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
S_ENDPGM
|
|
...
|
|
---
|
|
# 64-bit load clobbers its own ptr reg
|
|
name: smrd_load8_overwrite_ptr
|
|
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: smrd_load8_overwrite_ptr
|
|
; GCN: %sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: S_ENDPGM
|
|
%sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0
|
|
S_ENDPGM
|
|
...
|
|
---
|
|
# vmcnt has 4 bits, so maximum 16 outstanding loads. The waitcnt
|
|
# breaks the clause.
|
|
|
|
name: break_smem_clause_at_max_smem_clause_size_smrd_load4
|
|
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: break_smem_clause_at_max_smem_clause_size_smrd_load4
|
|
; GCN: %sgpr13 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %sgpr14 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %sgpr15 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %sgpr16 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %sgpr17 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %sgpr18 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %sgpr19 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %sgpr20 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %sgpr21 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %sgpr22 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %sgpr23 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %sgpr24 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %sgpr25 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %sgpr26 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %sgpr27 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %sgpr28 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %sgpr0 = S_LOAD_DWORD_IMM %sgpr30_sgpr31, 0, 0
|
|
; GCN-NEXT: %sgpr0 = S_MOV_B32 %sgpr0, implicit %sgpr13, implicit %sgpr14, implicit %sgpr15, implicit %sgpr16, implicit %sgpr17, implicit %sgpr18, implicit %sgpr19, implicit %sgpr20, implicit %sgpr21, implicit %sgpr22, implicit %sgpr23, implicit %sgpr24, implicit %sgpr25, implicit %sgpr26, implicit %sgpr27, implicit %sgpr28
|
|
; GCN-NEXT: S_ENDPGM
|
|
%sgpr13 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
%sgpr14 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
%sgpr15 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
%sgpr16 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
|
|
%sgpr17 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
%sgpr18 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
%sgpr19 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
%sgpr20 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
|
|
%sgpr21 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
%sgpr22 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
%sgpr23 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
%sgpr24 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
|
|
%sgpr25 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
%sgpr26 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
%sgpr27 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
%sgpr28 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
|
|
%sgpr0 = S_LOAD_DWORD_IMM %sgpr30_sgpr31, 0, 0
|
|
%sgpr0 = S_MOV_B32 %sgpr0, implicit %sgpr13, implicit %sgpr14, implicit %sgpr15, implicit %sgpr16, implicit %sgpr17, implicit %sgpr18, implicit %sgpr19, implicit %sgpr20, implicit %sgpr21, implicit %sgpr22, implicit %sgpr23, implicit %sgpr24, implicit %sgpr25, implicit %sgpr26, implicit %sgpr27, implicit %sgpr28
|
|
S_ENDPGM
|
|
...
|
|
---
|
|
|
|
name: break_smem_clause_simple_load_smrd4_lo_ptr
|
|
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_lo_ptr
|
|
; GCN: %sgpr10 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; XNACK-NEXT: S_NOP 0
|
|
; GCN-NEXT: %sgpr12 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
|
|
; GCN-NEXT: S_ENDPGM
|
|
%sgpr10 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
%sgpr12 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
|
|
S_ENDPGM
|
|
...
|
|
---
|
|
|
|
name: break_smem_clause_simple_load_smrd4_hi_ptr
|
|
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_hi_ptr
|
|
; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %sgpr3 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
|
|
; GCN-NEXT: S_ENDPGM
|
|
%sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
%sgpr3 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
|
|
S_ENDPGM
|
|
...
|
|
---
|
|
|
|
name: break_smem_clause_simple_load_smrd8_ptr
|
|
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: break_smem_clause_simple_load_smrd8_ptr
|
|
; GCN: %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0
|
|
; XNACK-NEXT: S_NOP 0
|
|
; GCN-NEXT: %sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM %sgpr12_sgpr13, 0, 0
|
|
; GCN-NEXT: S_ENDPGM
|
|
%sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0
|
|
%sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM %sgpr12_sgpr13, 0, 0
|
|
S_ENDPGM
|
|
...
|
|
---
|
|
|
|
name: break_smem_clause_simple_load_smrd16_ptr
|
|
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: break_smem_clause_simple_load_smrd16_ptr
|
|
; GCN: %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM %sgpr6_sgpr7, 0, 0
|
|
; GCN-NEXT: S_ENDPGM
|
|
%sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0
|
|
%sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM %sgpr6_sgpr7, 0, 0
|
|
S_ENDPGM
|
|
...
|
|
---
|
|
|
|
name: break_smem_clause_block_boundary_load_smrd8_ptr
|
|
|
|
body: |
|
|
; GCN-LABEL: name: break_smem_clause_block_boundary_load_smrd8_ptr
|
|
; GCN: bb.0:
|
|
; GCN: successors: %bb.1(0x80000000)
|
|
; GCN: %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN: bb.1:
|
|
; XNACK-NEXT: S_NOP 0
|
|
; GCN-NEXT: %sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM %sgpr12_sgpr13, 0, 0
|
|
; GCN-NEXT: S_ENDPGM
|
|
bb.0:
|
|
%sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0
|
|
|
|
bb.1:
|
|
%sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM %sgpr12_sgpr13, 0, 0
|
|
S_ENDPGM
|
|
...
|
|
---
|
|
# The load clobbers the pointer of the store, so it needs to break.
|
|
|
|
name: break_smem_clause_store_load_into_ptr_smrd4
|
|
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: break_smem_clause_store_load_into_ptr_smrd4
|
|
; GCN: S_STORE_DWORD_IMM %sgpr16, %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %sgpr12 = S_LOAD_DWORD_IMM %sgpr14_sgpr15, 0, 0
|
|
; GCN-NEXT: S_ENDPGM
|
|
S_STORE_DWORD_IMM %sgpr16, %sgpr10_sgpr11, 0, 0
|
|
%sgpr12 = S_LOAD_DWORD_IMM %sgpr14_sgpr15, 0, 0
|
|
S_ENDPGM
|
|
...
|
|
---
|
|
# The load clobbers the data of the store, so it needs to break.
|
|
# FIXME: Would it be better to s_nop and wait later?
|
|
|
|
name: break_smem_clause_store_load_into_data_smrd4
|
|
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: break_smem_clause_store_load_into_data_smrd4
|
|
; GCN: S_STORE_DWORD_IMM %sgpr8, %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %sgpr8 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
|
|
; GCN-NEXT: S_ENDPGM
|
|
S_STORE_DWORD_IMM %sgpr8, %sgpr10_sgpr11, 0, 0
|
|
%sgpr8 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
|
|
S_ENDPGM
|
|
...
|
|
---
|
|
# Regular VALU instruction breaks clause, no nop needed
|
|
name: valu_inst_breaks_smem_clause
|
|
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: valu_inst_breaks_smem_clause
|
|
; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %vgpr8 = V_MOV_B32_e32 0, implicit %exec
|
|
; GCN-NEXT: %sgpr2 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
|
|
; GCN-NEXT: S_ENDPGM
|
|
%sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
%vgpr8 = V_MOV_B32_e32 0, implicit %exec
|
|
%sgpr2 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
|
|
S_ENDPGM
|
|
...
|
|
---
|
|
# Regular SALU instruction breaks clause, no nop needed
|
|
name: salu_inst_breaks_smem_clause
|
|
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: salu_inst_breaks_smem_clause
|
|
; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %sgpr8 = S_MOV_B32 0
|
|
; GCN-NEXT: %sgpr2 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
|
|
; GCN-NEXT: S_ENDPGM
|
|
%sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
%sgpr8 = S_MOV_B32 0
|
|
%sgpr2 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
|
|
S_ENDPGM
|
|
...
|
|
---
|
|
name: ds_inst_breaks_smem_clause
|
|
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: ds_inst_breaks_smem_clause
|
|
; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %vgpr8 = DS_READ_B32 %vgpr9, 0, 0, implicit %m0, implicit %exec
|
|
; GCN-NEXT: %sgpr2 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
|
|
; GCN-NEXT: S_ENDPGM
|
|
%sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
%vgpr8 = DS_READ_B32 %vgpr9, 0, 0, implicit %m0, implicit %exec
|
|
%sgpr2 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
|
|
S_ENDPGM
|
|
...
|
|
---
|
|
|
|
name: flat_inst_breaks_smem_clause
|
|
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: flat_inst_breaks_smem_clause
|
|
; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
; GCN-NEXT: %vgpr0 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
|
|
; GCN-NEXT: %sgpr2 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
|
|
; GCN-NEXT: S_ENDPGM
|
|
%sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0
|
|
%vgpr0 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
|
|
%sgpr2 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0
|
|
S_ENDPGM
|
|
...
|
|
---
|
|
# FIXME: Should this be handled?
|
|
name: implicit_use_breaks_smem_clause
|
|
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: implicit_use_breaks_smem_clause
|
|
; GCN: %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0, implicit %sgpr12_sgpr13
|
|
; XNACK-NEXT: S_NOP 0
|
|
; GCN-NEXT: %sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM %sgpr6_sgpr7, 0, 0
|
|
; GCN-NEXT: S_ENDPGM
|
|
%sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0, implicit %sgpr12_sgpr13
|
|
%sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM %sgpr6_sgpr7, 0, 0
|
|
S_ENDPGM
|
|
...
|