llvm-project/llvm/test/CodeGen/AMDGPU/smrd-vccz-bug.ll

; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VCCZ-BUG %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VCCZ-BUG %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NOVCCZ-BUG %s

; GCN-FUNC: {{^}}vccz_workaround:
; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x0
; GCN: v_cmp_neq_f32_e64 {{[^,]*}}, s{{[0-9]+}}, 0{{$}}
; VCCZ-BUG: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VCCZ-BUG: s_mov_b64 vcc, vcc
; NOVCCZ-BUG-NOT: s_mov_b64 vcc, vcc
; GCN: s_cbranch_vccnz [[EXIT:[0-9A-Za-z_]+]]
; GCN: buffer_store_dword
; GCN: [[EXIT]]:
; GCN: s_endpgm
define amdgpu_kernel void @vccz_workaround(i32 addrspace(4)* %in, i32 addrspace(1)* %out, float %cond) {
entry:
  %cnd = fcmp oeq float 0.0, %cond
  %sgpr = load volatile i32, i32 addrspace(4)* %in
  br i1 %cnd, label %if, label %endif

if:
  store i32 %sgpr, i32 addrspace(1)* %out
  br label %endif

endif:
  ret void
}

; GCN-FUNC: {{^}}vccz_noworkaround:
; GCN: v_cmp_neq_f32_e32 vcc, 0, v{{[0-9]+}}
; GCN: s_cbranch_vccnz [[EXIT:[0-9A-Za-z_]+]]
; GCN: buffer_store_dword
; GCN: [[EXIT]]:
; GCN: s_endpgm
define amdgpu_kernel void @vccz_noworkaround(float addrspace(1)* %in, float addrspace(1)* %out) {
entry:
  %vgpr = load volatile float, float addrspace(1)* %in
  %cnd = fcmp oeq float 0.0, %vgpr
  br i1 %cnd, label %if, label %endif

if:
  store float %vgpr, float addrspace(1)* %out
  br label %endif

endif:
  ret void
}
AMDGPU/SI: Detect uniform branches and emit s_cbranch instructions Reviewers: arsenm Subscribers: mareko, MatzeB, qcolombet, arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D16603 llvm-svn: 260765 2016-02-13 07:45:29 +08:00			`; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VCCZ-BUG %s`
			`; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VCCZ-BUG %s`
Enable FeatureFlatForGlobal on Volcanic Islands This switches to the workaround that HSA defaults to for the mesa path. This should be applied to the 4.0 branch. Patch by Vedran Miletić <vedran@miletic.net> llvm-svn: 292982 2017-01-25 06:02:15 +08:00			`; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=NOVCCZ-BUG %s`
AMDGPU/SI: Detect uniform branches and emit s_cbranch instructions Reviewers: arsenm Subscribers: mareko, MatzeB, qcolombet, arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D16603 llvm-svn: 260765 2016-02-13 07:45:29 +08:00
			`; GCN-FUNC: {{^}}vccz_workaround:`
			`; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x0`
[AMDGPU] Fixed incorrect uniform branch condition Summary: I had a case where multiple nested uniform ifs resulted in code that did v_cmp comparisons, combining the results with s_and_b64, s_or_b64 and s_xor_b64 and using the resulting mask in s_cbranch_vccnz, without first ensuring that bits for inactive lanes were clear. There was already code for inserting an "s_and_b64 vcc, exec, vcc" to clear bits for inactive lanes in the case that the branch is instruction selected as s_cbranch_scc1 and is then changed to s_cbranch_vccnz in SIFixSGPRCopies. I have added the same code into SILowerControlFlow for the case that the branch is instruction selected as s_cbranch_vccnz. This de-optimizes the code in some cases where the s_and is not needed, because vcc is the result of a v_cmp, or multiple v_cmp instructions combined by s_and/s_or. We should add a pass to re-optimize those cases. Reviewers: arsenm, kzhuravl Subscribers: wdng, yaxunl, t-tye, llvm-commits, dstuttard, timcorringham, nhaehnle Differential Revision: https://reviews.llvm.org/D41292 llvm-svn: 322119 2018-01-10 05:34:43 +08:00			`; GCN: v_cmp_neq_f32_e64 {{[^,]*}}, s{{[0-9]+}}, 0{{$}}`
[AMDGPU] Turn on the new waitcnt insertion pass. Adjust tests. -enable-si-insert-waitcnts=1 becomes the default -enable-si-insert-waitcnts=0 to use old pass Differential Revision: https://reviews.llvm.org/D33730 llvm-svn: 304551 2017-06-02 22:19:25 +08:00			`; VCCZ-BUG: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)`
AMDGPU/SI: Detect uniform branches and emit s_cbranch instructions Reviewers: arsenm Subscribers: mareko, MatzeB, qcolombet, arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D16603 llvm-svn: 260765 2016-02-13 07:45:29 +08:00			`; VCCZ-BUG: s_mov_b64 vcc, vcc`
			`; NOVCCZ-BUG-NOT: s_mov_b64 vcc, vcc`
			`; GCN: s_cbranch_vccnz [[EXIT:[0-9A-Za-z_]+]]`
			`; GCN: buffer_store_dword`
			`; GCN: [[EXIT]]:`
			`; GCN: s_endpgm`
[AMDGPU] Change constant addr space to 4 Differential Revision: https://reviews.llvm.org/D43170 llvm-svn: 325030 2018-02-14 02:00:25 +08:00			`define amdgpu_kernel void @vccz_workaround(i32 addrspace(4)* %in, i32 addrspace(1)* %out, float %cond) {`
AMDGPU/SI: Detect uniform branches and emit s_cbranch instructions Reviewers: arsenm Subscribers: mareko, MatzeB, qcolombet, arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D16603 llvm-svn: 260765 2016-02-13 07:45:29 +08:00			`entry:`
			`%cnd = fcmp oeq float 0.0, %cond`
[AMDGPU] Change constant addr space to 4 Differential Revision: https://reviews.llvm.org/D43170 llvm-svn: 325030 2018-02-14 02:00:25 +08:00			`%sgpr = load volatile i32, i32 addrspace(4)* %in`
AMDGPU/SI: Detect uniform branches and emit s_cbranch instructions Reviewers: arsenm Subscribers: mareko, MatzeB, qcolombet, arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D16603 llvm-svn: 260765 2016-02-13 07:45:29 +08:00			`br i1 %cnd, label %if, label %endif`

			`if:`
			`store i32 %sgpr, i32 addrspace(1)* %out`
			`br label %endif`

			`endif:`
			`ret void`
			`}`

			`; GCN-FUNC: {{^}}vccz_noworkaround:`
			`; GCN: v_cmp_neq_f32_e32 vcc, 0, v{{[0-9]+}}`
			`; GCN: s_cbranch_vccnz [[EXIT:[0-9A-Za-z_]+]]`
			`; GCN: buffer_store_dword`
			`; GCN: [[EXIT]]:`
			`; GCN: s_endpgm`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @vccz_noworkaround(float addrspace(1)* %in, float addrspace(1)* %out) {`
AMDGPU/SI: Detect uniform branches and emit s_cbranch instructions Reviewers: arsenm Subscribers: mareko, MatzeB, qcolombet, arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D16603 llvm-svn: 260765 2016-02-13 07:45:29 +08:00			`entry:`
			`%vgpr = load volatile float, float addrspace(1)* %in`
			`%cnd = fcmp oeq float 0.0, %vgpr`
			`br i1 %cnd, label %if, label %endif`

			`if:`
			`store float %vgpr, float addrspace(1)* %out`
			`br label %endif`

			`endif:`
			`ret void`
			`}`