2018-05-07 22:43:28 +08:00
|
|
|
# RUN: llc -run-pass si-insert-waitcnts -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s
|
2016-11-08 03:09:27 +08:00
|
|
|
--- |
|
|
|
|
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @vccz_corrupt_workaround(float %cond, i32 addrspace(1)* %out) #0 {
|
2016-11-08 03:09:27 +08:00
|
|
|
entry:
|
|
|
|
%cmp0 = fcmp oeq float %cond, 0.000000e+00
|
|
|
|
br i1 %cmp0, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0
|
|
|
|
|
|
|
|
else: ; preds = %entry
|
|
|
|
store volatile i32 100, i32 addrspace(1)* undef
|
|
|
|
br label %done, !structurizecfg.uniform !0
|
|
|
|
|
|
|
|
if: ; preds = %entry
|
|
|
|
store volatile i32 9, i32 addrspace(1)* undef
|
|
|
|
br label %done, !structurizecfg.uniform !0
|
|
|
|
|
|
|
|
done: ; preds = %if, %else
|
|
|
|
%value = phi i32 [ 0, %if ], [ 1, %else ]
|
|
|
|
store i32 %value, i32 addrspace(1)* %out
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @vccz_corrupt_undef_vcc(float %cond, i32 addrspace(1)* %out) #0 {
|
2016-11-08 03:09:27 +08:00
|
|
|
entry:
|
|
|
|
br i1 undef, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0
|
|
|
|
|
|
|
|
else: ; preds = %entry
|
|
|
|
store volatile i32 100, i32 addrspace(1)* undef
|
|
|
|
br label %done, !structurizecfg.uniform !0
|
|
|
|
|
|
|
|
if: ; preds = %entry
|
|
|
|
store volatile i32 9, i32 addrspace(1)* undef
|
|
|
|
br label %done, !structurizecfg.uniform !0
|
|
|
|
|
|
|
|
done: ; preds = %if, %else
|
|
|
|
%value = phi i32 [ 0, %if ], [ 1, %else ]
|
|
|
|
store i32 %value, i32 addrspace(1)* %out
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
attributes #0 = { nounwind }
|
|
|
|
attributes #1 = { readnone }
|
|
|
|
|
|
|
|
!0 = !{}
|
|
|
|
|
|
|
|
...
|
|
|
|
---
|
|
|
|
# CHECK-LABEL: name: vccz_corrupt_workaround
|
2018-02-01 06:04:26 +08:00
|
|
|
# CHECK: $vcc = V_CMP_EQ_F32
|
AMDGPU/InsertWaitcnts: Untangle some semi-global state
Summary:
Reduce the statefulness of the algorithm in two ways:
1. More clearly split generateWaitcntInstBefore into two phases: the
first one which determines the required wait, if any, without changing
the ScoreBrackets, and the second one which actually inserts the wait
and updates the brackets.
2. Communicate pre-existing s_waitcnt instructions using an argument to
generateWaitcntInstBefore instead of through the ScoreBrackets.
To simplify these changes, a Waitcnt structure is introduced which carries
the counts of an s_waitcnt instruction in decoded form.
There are some functional changes:
1. The FIXME for the VCCZ bug workaround was implemented: we only wait for
SMEM instructions as required instead of waiting on all counters.
2. We now properly track pre-existing waitcnt's in all cases, which leads
to less conservative waitcnts being emitted in some cases.
s_load_dword ...
s_waitcnt lgkmcnt(0) <-- pre-existing wait count
ds_read_b32 v0, ...
ds_read_b32 v1, ...
s_waitcnt lgkmcnt(0) <-- this is too conservative
use(v0)
more code
use(v1)
This increases code size a bit, but the reduced latency should still be a
win in basically all cases. The worst code size regressions in my shader-db
are:
WORST REGRESSIONS - Code Size
Before After Delta Percentage
1724 1736 12 0.70 % shaders/private/f1-2015/1334.shader_test [0]
2276 2284 8 0.35 % shaders/private/f1-2015/1306.shader_test [0]
4632 4640 8 0.17 % shaders/private/ue4_elemental/62.shader_test [0]
2376 2384 8 0.34 % shaders/private/f1-2015/1308.shader_test [0]
3284 3292 8 0.24 % shaders/private/talos_principle/1955.shader_test [0]
Reviewers: msearles, rampitec, scott.linder, kanarayan
Subscribers: arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits, hakzsam
Differential Revision: https://reviews.llvm.org/D54226
llvm-svn: 347848
2018-11-29 19:06:06 +08:00
|
|
|
# CHECK-NEXT: S_WAITCNT 127
|
2018-02-01 06:04:26 +08:00
|
|
|
# CHECK-NEXT: $vcc = S_MOV_B64 $vcc
|
|
|
|
# CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit killed $vcc
|
2016-11-08 03:09:27 +08:00
|
|
|
|
|
|
|
name: vccz_corrupt_workaround
|
|
|
|
alignment: 0
|
|
|
|
exposesReturnsTwice: false
|
|
|
|
legalized: false
|
|
|
|
regBankSelected: false
|
|
|
|
selected: false
|
|
|
|
tracksRegLiveness: true
|
|
|
|
liveins:
|
2018-02-01 06:04:26 +08:00
|
|
|
- { reg: '$sgpr0_sgpr1' }
|
2016-11-08 03:09:27 +08:00
|
|
|
frameInfo:
|
|
|
|
isFrameAddressTaken: false
|
|
|
|
isReturnAddressTaken: false
|
|
|
|
hasStackMap: false
|
|
|
|
hasPatchPoint: false
|
|
|
|
stackSize: 0
|
|
|
|
offsetAdjustment: 0
|
|
|
|
maxAlignment: 0
|
|
|
|
adjustsStack: false
|
|
|
|
hasCalls: false
|
|
|
|
maxCallFrameSize: 0
|
|
|
|
hasOpaqueSPAdjustment: false
|
|
|
|
hasVAStart: false
|
|
|
|
hasMustTailInVarArgFunc: false
|
|
|
|
body: |
|
|
|
|
bb.0.entry:
|
2018-05-07 22:43:28 +08:00
|
|
|
liveins: $sgpr0_sgpr1, $vcc
|
2016-11-08 03:09:27 +08:00
|
|
|
|
2018-09-10 10:54:25 +08:00
|
|
|
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
2018-02-01 06:04:26 +08:00
|
|
|
$sgpr7 = S_MOV_B32 61440
|
|
|
|
$sgpr6 = S_MOV_B32 -1
|
2018-05-07 22:43:28 +08:00
|
|
|
$vcc = V_CMP_EQ_F32_e64 0, 0, 0, undef $sgpr2, 0, implicit $exec
|
2018-02-01 06:04:26 +08:00
|
|
|
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
2016-11-08 03:09:27 +08:00
|
|
|
|
|
|
|
bb.2.if:
|
2018-02-01 06:04:26 +08:00
|
|
|
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
2016-11-08 03:09:27 +08:00
|
|
|
|
2018-02-01 06:04:26 +08:00
|
|
|
$vgpr0 = V_MOV_B32_e32 9, implicit $exec
|
|
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
|
|
|
|
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
2017-12-05 01:18:51 +08:00
|
|
|
S_BRANCH %bb.3
|
2016-11-08 03:09:27 +08:00
|
|
|
|
|
|
|
bb.1.else:
|
2018-02-01 06:04:26 +08:00
|
|
|
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
2016-11-08 03:09:27 +08:00
|
|
|
|
2018-02-01 06:04:26 +08:00
|
|
|
$vgpr0 = V_MOV_B32_e32 100, implicit $exec
|
|
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
|
|
|
|
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
2016-11-08 03:09:27 +08:00
|
|
|
|
|
|
|
bb.3.done:
|
2018-02-01 06:04:26 +08:00
|
|
|
liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
2016-11-08 03:09:27 +08:00
|
|
|
|
2018-02-01 06:04:26 +08:00
|
|
|
$sgpr3 = S_MOV_B32 61440
|
|
|
|
$sgpr2 = S_MOV_B32 -1
|
|
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
|
[AMDGPU] Add support for immediate operand for S_ENDPGM
Summary:
Add support for immediate operand in S_ENDPGM
Change-Id: I0c56a076a10980f719fb2a8f16407e9c301013f6
Reviewers: alexshap
Subscribers: qcolombet, arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, tpr, t-tye, eraman, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59213
llvm-svn: 355902
2019-03-12 17:52:58 +08:00
|
|
|
S_ENDPGM 0
|
2016-11-08 03:09:27 +08:00
|
|
|
|
|
|
|
...
|
|
|
|
---
|
|
|
|
# CHECK-LABEL: name: vccz_corrupt_undef_vcc
|
2018-05-07 22:43:28 +08:00
|
|
|
# CHECK: S_WAITCNT 3855
|
|
|
|
# CHECK-NEXT: $vgpr0 = V_MOV_B32_e32
|
2016-11-08 03:09:27 +08:00
|
|
|
|
|
|
|
name: vccz_corrupt_undef_vcc
|
|
|
|
alignment: 0
|
|
|
|
exposesReturnsTwice: false
|
|
|
|
legalized: false
|
|
|
|
regBankSelected: false
|
|
|
|
selected: false
|
|
|
|
tracksRegLiveness: true
|
|
|
|
liveins:
|
2018-02-01 06:04:26 +08:00
|
|
|
- { reg: '$sgpr0_sgpr1' }
|
2016-11-08 03:09:27 +08:00
|
|
|
frameInfo:
|
|
|
|
isFrameAddressTaken: false
|
|
|
|
isReturnAddressTaken: false
|
|
|
|
hasStackMap: false
|
|
|
|
hasPatchPoint: false
|
|
|
|
stackSize: 0
|
|
|
|
offsetAdjustment: 0
|
|
|
|
maxAlignment: 0
|
|
|
|
adjustsStack: false
|
|
|
|
hasCalls: false
|
|
|
|
maxCallFrameSize: 0
|
|
|
|
hasOpaqueSPAdjustment: false
|
|
|
|
hasVAStart: false
|
|
|
|
hasMustTailInVarArgFunc: false
|
|
|
|
body: |
|
|
|
|
bb.0.entry:
|
2018-02-01 06:04:26 +08:00
|
|
|
liveins: $sgpr0_sgpr1
|
2016-11-08 03:09:27 +08:00
|
|
|
|
2018-09-10 10:54:25 +08:00
|
|
|
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
2018-02-01 06:04:26 +08:00
|
|
|
$sgpr7 = S_MOV_B32 61440
|
|
|
|
$sgpr6 = S_MOV_B32 -1
|
|
|
|
S_CBRANCH_VCCZ %bb.1, implicit undef $vcc
|
2016-11-08 03:09:27 +08:00
|
|
|
|
|
|
|
bb.2.if:
|
2018-02-01 06:04:26 +08:00
|
|
|
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
2016-11-08 03:09:27 +08:00
|
|
|
|
2018-02-01 06:04:26 +08:00
|
|
|
$vgpr0 = V_MOV_B32_e32 9, implicit $exec
|
|
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
|
|
|
|
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
2017-12-05 01:18:51 +08:00
|
|
|
S_BRANCH %bb.3
|
2016-11-08 03:09:27 +08:00
|
|
|
|
|
|
|
bb.1.else:
|
2018-02-01 06:04:26 +08:00
|
|
|
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
2016-11-08 03:09:27 +08:00
|
|
|
|
2018-02-01 06:04:26 +08:00
|
|
|
$vgpr0 = V_MOV_B32_e32 100, implicit $exec
|
|
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
|
|
|
|
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
2016-11-08 03:09:27 +08:00
|
|
|
|
|
|
|
bb.3.done:
|
2018-02-01 06:04:26 +08:00
|
|
|
liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
2016-11-08 03:09:27 +08:00
|
|
|
|
2018-02-01 06:04:26 +08:00
|
|
|
$sgpr3 = S_MOV_B32 61440
|
|
|
|
$sgpr2 = S_MOV_B32 -1
|
|
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
|
[AMDGPU] Add support for immediate operand for S_ENDPGM
Summary:
Add support for immediate operand in S_ENDPGM
Change-Id: I0c56a076a10980f719fb2a8f16407e9c301013f6
Reviewers: alexshap
Subscribers: qcolombet, arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, tpr, t-tye, eraman, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59213
llvm-svn: 355902
2019-03-12 17:52:58 +08:00
|
|
|
S_ENDPGM 0
|
2016-11-08 03:09:27 +08:00
|
|
|
|
|
|
|
...
|