2019-11-26 17:46:19 +08:00
|
|
|
# RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s -check-prefixes=CHECK,SI
|
|
|
|
# RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=gfx900 -o - %s | FileCheck %s -check-prefixes=CHECK,GFX9
|
|
|
|
# RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -o - %s | FileCheck %s -check-prefixes=CHECK,GFX10
|
2016-11-08 03:09:27 +08:00
|
|
|
---
|
|
|
|
# CHECK-LABEL: name: vccz_corrupt_workaround
|
2018-02-01 06:04:26 +08:00
|
|
|
# CHECK: $vcc = V_CMP_EQ_F32
|
2019-11-26 17:46:19 +08:00
|
|
|
# SI-NEXT: S_WAITCNT 127
|
|
|
|
# SI-NEXT: $vcc = S_MOV_B64 $vcc
|
2018-02-01 06:04:26 +08:00
|
|
|
# CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit killed $vcc
|
2016-11-08 03:09:27 +08:00
|
|
|
|
2019-11-26 17:46:19 +08:00
|
|
|
name: vccz_corrupt_workaround
|
2016-11-08 03:09:27 +08:00
|
|
|
tracksRegLiveness: true
|
2019-11-26 17:46:19 +08:00
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
|
|
|
|
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0
|
2018-02-01 06:04:26 +08:00
|
|
|
$sgpr7 = S_MOV_B32 61440
|
|
|
|
$sgpr6 = S_MOV_B32 -1
|
2020-05-28 01:25:37 +08:00
|
|
|
$vcc = V_CMP_EQ_F32_e64 0, 0, 0, undef $sgpr2, 0, implicit $mode, implicit $exec
|
2018-02-01 06:04:26 +08:00
|
|
|
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
2016-11-08 03:09:27 +08:00
|
|
|
|
2019-11-26 17:46:19 +08:00
|
|
|
bb.2:
|
2018-02-01 06:04:26 +08:00
|
|
|
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
2016-11-08 03:09:27 +08:00
|
|
|
|
2018-02-01 06:04:26 +08:00
|
|
|
$vgpr0 = V_MOV_B32_e32 9, implicit $exec
|
2019-11-26 17:46:19 +08:00
|
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
2018-02-01 06:04:26 +08:00
|
|
|
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
2017-12-05 01:18:51 +08:00
|
|
|
S_BRANCH %bb.3
|
2016-11-08 03:09:27 +08:00
|
|
|
|
2019-11-26 17:46:19 +08:00
|
|
|
bb.1:
|
2018-02-01 06:04:26 +08:00
|
|
|
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
2016-11-08 03:09:27 +08:00
|
|
|
|
2018-02-01 06:04:26 +08:00
|
|
|
$vgpr0 = V_MOV_B32_e32 100, implicit $exec
|
2019-11-26 17:46:19 +08:00
|
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
2018-02-01 06:04:26 +08:00
|
|
|
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
2016-11-08 03:09:27 +08:00
|
|
|
|
2019-11-26 17:46:19 +08:00
|
|
|
bb.3:
|
2018-02-01 06:04:26 +08:00
|
|
|
liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
2016-11-08 03:09:27 +08:00
|
|
|
|
2018-02-01 06:04:26 +08:00
|
|
|
$sgpr3 = S_MOV_B32 61440
|
|
|
|
$sgpr2 = S_MOV_B32 -1
|
2019-11-26 17:46:19 +08:00
|
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
[AMDGPU] Add support for immediate operand for S_ENDPGM
Summary:
Add support for immediate operand in S_ENDPGM
Change-Id: I0c56a076a10980f719fb2a8f16407e9c301013f6
Reviewers: alexshap
Subscribers: qcolombet, arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, tpr, t-tye, eraman, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59213
llvm-svn: 355902
2019-03-12 17:52:58 +08:00
|
|
|
S_ENDPGM 0
|
2016-11-08 03:09:27 +08:00
|
|
|
|
|
|
|
...
|
|
|
|
---
|
|
|
|
# CHECK-LABEL: name: vccz_corrupt_undef_vcc
|
2019-11-26 17:46:19 +08:00
|
|
|
# CHECK: BUFFER_STORE_DWORD_OFFSET
|
|
|
|
# SI-NEXT: S_WAITCNT 3855
|
2018-05-07 22:43:28 +08:00
|
|
|
# CHECK-NEXT: $vgpr0 = V_MOV_B32_e32
|
2016-11-08 03:09:27 +08:00
|
|
|
|
2019-11-26 17:46:19 +08:00
|
|
|
name: vccz_corrupt_undef_vcc
|
2016-11-08 03:09:27 +08:00
|
|
|
tracksRegLiveness: true
|
2019-11-26 17:46:19 +08:00
|
|
|
body: |
|
|
|
|
bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
liveins: $sgpr0_sgpr1
|
2016-11-08 03:09:27 +08:00
|
|
|
|
2019-11-26 17:46:19 +08:00
|
|
|
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0
|
2018-02-01 06:04:26 +08:00
|
|
|
$sgpr7 = S_MOV_B32 61440
|
|
|
|
$sgpr6 = S_MOV_B32 -1
|
|
|
|
S_CBRANCH_VCCZ %bb.1, implicit undef $vcc
|
2016-11-08 03:09:27 +08:00
|
|
|
|
2019-11-26 17:46:19 +08:00
|
|
|
bb.2:
|
2018-02-01 06:04:26 +08:00
|
|
|
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
2016-11-08 03:09:27 +08:00
|
|
|
|
2018-02-01 06:04:26 +08:00
|
|
|
$vgpr0 = V_MOV_B32_e32 9, implicit $exec
|
2019-11-26 17:46:19 +08:00
|
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
2018-02-01 06:04:26 +08:00
|
|
|
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
2017-12-05 01:18:51 +08:00
|
|
|
S_BRANCH %bb.3
|
2016-11-08 03:09:27 +08:00
|
|
|
|
2019-11-26 17:46:19 +08:00
|
|
|
bb.1:
|
2018-02-01 06:04:26 +08:00
|
|
|
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
2016-11-08 03:09:27 +08:00
|
|
|
|
2018-02-01 06:04:26 +08:00
|
|
|
$vgpr0 = V_MOV_B32_e32 100, implicit $exec
|
2019-11-26 17:46:19 +08:00
|
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
2018-02-01 06:04:26 +08:00
|
|
|
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
2016-11-08 03:09:27 +08:00
|
|
|
|
2019-11-26 17:46:19 +08:00
|
|
|
bb.3:
|
2018-02-01 06:04:26 +08:00
|
|
|
liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
2016-11-08 03:09:27 +08:00
|
|
|
|
2018-02-01 06:04:26 +08:00
|
|
|
$sgpr3 = S_MOV_B32 61440
|
|
|
|
$sgpr2 = S_MOV_B32 -1
|
2019-11-26 17:46:19 +08:00
|
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
[AMDGPU] Add support for immediate operand for S_ENDPGM
Summary:
Add support for immediate operand in S_ENDPGM
Change-Id: I0c56a076a10980f719fb2a8f16407e9c301013f6
Reviewers: alexshap
Subscribers: qcolombet, arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, tpr, t-tye, eraman, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59213
llvm-svn: 355902
2019-03-12 17:52:58 +08:00
|
|
|
S_ENDPGM 0
|
2016-11-08 03:09:27 +08:00
|
|
|
|
|
|
|
...
|
[AMDGPU] Fix vccz after v_readlane/v_readfirstlane to vcc_lo/hi
Summary:
Up to gfx9, writes to vcc_lo and vcc_hi by instructions like
v_readlane and v_readfirstlane do not update vccz to reflect the new
value of vcc. Fix it by reusing part of the existing vccz bug handling
code, which inserts an "s_mov_b64 vcc, vcc" instruction to restore vccz
just before an instruction that needs the correct value.
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69661
2019-10-30 20:18:51 +08:00
|
|
|
---
|
|
|
|
# Test that after reloading vcc spilled to a vgpr, we insert any necessary
|
|
|
|
# instructions to fix vccz.
|
|
|
|
|
|
|
|
# CHECK-LABEL: name: reload_vcc_from_vgpr
|
2020-10-29 20:10:56 +08:00
|
|
|
# CHECK: $vcc_lo = V_READLANE_B32 $vgpr0, 8, implicit-def $vcc
|
|
|
|
# CHECK: $vcc_hi = V_READLANE_B32 $vgpr0, 9
|
[AMDGPU] Fix vccz after v_readlane/v_readfirstlane to vcc_lo/hi
Summary:
Up to gfx9, writes to vcc_lo and vcc_hi by instructions like
v_readlane and v_readfirstlane do not update vccz to reflect the new
value of vcc. Fix it by reusing part of the existing vccz bug handling
code, which inserts an "s_mov_b64 vcc, vcc" instruction to restore vccz
just before an instruction that needs the correct value.
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69661
2019-10-30 20:18:51 +08:00
|
|
|
# SI: $vcc = S_MOV_B64 $vcc
|
|
|
|
# GFX9: $vcc = S_MOV_B64 $vcc
|
|
|
|
# CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
|
|
|
|
|
|
|
|
name: reload_vcc_from_vgpr
|
|
|
|
body: |
|
|
|
|
bb.0:
|
2020-10-29 20:10:56 +08:00
|
|
|
$vcc_lo = V_READLANE_B32 $vgpr0, 8, implicit-def $vcc
|
|
|
|
$vcc_hi = V_READLANE_B32 $vgpr0, 9
|
[AMDGPU] Fix vccz after v_readlane/v_readfirstlane to vcc_lo/hi
Summary:
Up to gfx9, writes to vcc_lo and vcc_hi by instructions like
v_readlane and v_readfirstlane do not update vccz to reflect the new
value of vcc. Fix it by reusing part of the existing vccz bug handling
code, which inserts an "s_mov_b64 vcc, vcc" instruction to restore vccz
just before an instruction that needs the correct value.
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69661
2019-10-30 20:18:51 +08:00
|
|
|
S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
|
|
|
|
bb.1:
|
|
|
|
|
|
|
|
...
|
|
|
|
---
|
|
|
|
# Test that after reloading vcc spilled to memory, we insert any necessary
|
|
|
|
# instructions to fix vccz.
|
|
|
|
|
|
|
|
# CHECK-LABEL: name: reload_vcc_from_mem
|
|
|
|
# CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, 0, 0, implicit $exec
|
|
|
|
# CHECK: $vcc_lo = V_READFIRSTLANE_B32 killed $vgpr0, implicit $exec, implicit-def $vcc
|
|
|
|
# CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, 0, 0, implicit $exec
|
|
|
|
# CHECK: $vcc_hi = V_READFIRSTLANE_B32 killed $vgpr0, implicit $exec, implicit-def $vcc
|
|
|
|
# SI: $vcc = S_MOV_B64 $vcc
|
|
|
|
# GFX9: $vcc = S_MOV_B64 $vcc
|
|
|
|
# CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
|
|
|
|
|
|
|
|
name: reload_vcc_from_mem
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, 0, 0, implicit $exec
|
|
|
|
$vcc_lo = V_READFIRSTLANE_B32 killed $vgpr0, implicit $exec, implicit-def $vcc
|
|
|
|
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, 0, 0, implicit $exec
|
|
|
|
$vcc_hi = V_READFIRSTLANE_B32 killed $vgpr0, implicit $exec, implicit-def $vcc
|
|
|
|
S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
|
|
|
|
bb.1:
|
|
|
|
|
|
|
|
...
|
|
|
|
---
|
|
|
|
# Test that after inline asm that defines vcc_lo, we insert any necessary
|
|
|
|
# instructions to fix vccz.
|
|
|
|
|
|
|
|
# CHECK-LABEL: name: inlineasm_def_vcc_lo
|
[MIR] Add comments to INLINEASM immediate flag MachineOperands
Summary:
The INLINEASM MIR instructions use immediate operands to encode the values of some operands.
The MachineInstr pretty printer function already handles those operands and prints human readable annotations instead of the immediates. This patch adds similar annotations to the output of the MIRPrinter, however uses the new MIROperandComment feature.
Reviewers: SjoerdMeijer, arsenm, efriedma
Reviewed By: arsenm
Subscribers: qcolombet, sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78088
2020-04-14 15:24:40 +08:00
|
|
|
# CHECK: INLINEASM &"; def vcc_lo", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vcc_lo
|
[AMDGPU] Fix vccz after v_readlane/v_readfirstlane to vcc_lo/hi
Summary:
Up to gfx9, writes to vcc_lo and vcc_hi by instructions like
v_readlane and v_readfirstlane do not update vccz to reflect the new
value of vcc. Fix it by reusing part of the existing vccz bug handling
code, which inserts an "s_mov_b64 vcc, vcc" instruction to restore vccz
just before an instruction that needs the correct value.
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69661
2019-10-30 20:18:51 +08:00
|
|
|
# SI: $vcc = S_MOV_B64 $vcc
|
|
|
|
# GFX9: $vcc = S_MOV_B64 $vcc
|
|
|
|
# CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
|
|
|
|
|
|
|
|
name: inlineasm_def_vcc_lo
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
INLINEASM &"; def vcc_lo", 1, 10, implicit-def $vcc_lo
|
|
|
|
S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
|
|
|
|
bb.1:
|
|
|
|
|
|
|
|
...
|
|
|
|
---
|
|
|
|
# Test that after inline asm that defines vcc, no unnecessary instructions are
|
|
|
|
# inserted to fix vccz.
|
|
|
|
|
|
|
|
# CHECK-LABEL: name: inlineasm_def_vcc
|
[MIR] Add comments to INLINEASM immediate flag MachineOperands
Summary:
The INLINEASM MIR instructions use immediate operands to encode the values of some operands.
The MachineInstr pretty printer function already handles those operands and prints human readable annotations instead of the immediates. This patch adds similar annotations to the output of the MIRPrinter, however uses the new MIROperandComment feature.
Reviewers: SjoerdMeijer, arsenm, efriedma
Reviewed By: arsenm
Subscribers: qcolombet, sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78088
2020-04-14 15:24:40 +08:00
|
|
|
# CHECK: INLINEASM &"; def vcc", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vcc
|
[AMDGPU] Fix vccz after v_readlane/v_readfirstlane to vcc_lo/hi
Summary:
Up to gfx9, writes to vcc_lo and vcc_hi by instructions like
v_readlane and v_readfirstlane do not update vccz to reflect the new
value of vcc. Fix it by reusing part of the existing vccz bug handling
code, which inserts an "s_mov_b64 vcc, vcc" instruction to restore vccz
just before an instruction that needs the correct value.
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69661
2019-10-30 20:18:51 +08:00
|
|
|
# CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
|
|
|
|
|
|
|
|
name: inlineasm_def_vcc
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
INLINEASM &"; def vcc", 1, 10, implicit-def $vcc
|
|
|
|
S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
|
|
|
|
bb.1:
|
|
|
|
|
|
|
|
...
|