llvm-project/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll

; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s

; FUNC-LABEL: {{^}}break_inserted_outside_of_loop:

; SI: [[LOOP_LABEL:[A-Z0-9]+]]:
; Lowered break instructin:
; SI: s_or_b64
; Lowered Loop instruction:
; SI: s_andn2_b64
; s_cbranch_execnz [[LOOP_LABEL]]
; SI: s_endpgm
define amdgpu_kernel void @break_inserted_outside_of_loop(i32 addrspace(1)* %out, i32 %a) {
main_body:
  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
  %0 = and i32 %a, %tid
  %1 = trunc i32 %0 to i1
  br label %ENDIF

ENDLOOP:
  store i32 0, i32 addrspace(1)* %out
  ret void

ENDIF:
  br i1 %1, label %ENDLOOP, label %ENDIF
}


; FUNC-LABEL: {{^}}phi_cond_outside_loop:
; FIXME: This could be folded into the s_or_b64 instruction
; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0
; SI: [[LOOP_LABEL:[A-Z0-9]+]]
; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}}

; SI_IF_BREAK instruction:
; SI: s_or_b64 [[BREAK:s\[[0-9]+:[0-9]+\]]], vcc, [[ZERO]]

; SI_LOOP instruction:
; SI: s_andn2_b64 exec, exec, [[BREAK]]
; SI: s_cbranch_execnz [[LOOP_LABEL]]
; SI: s_endpgm

define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) {
entry:
  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
  %0 = icmp eq i32 %tid , 0
  br i1 %0, label %if, label %else

if:
  br label %endif

else:
  %1 = icmp eq i32 %b, 0
  br label %endif

endif:
  %2 = phi i1 [0, %if], [%1, %else]
  br label %loop

loop:
  br i1 %2, label %exit, label %loop

exit:
  ret void
}

; FIXME: should emit s_endpgm
; CHECK-LABEL: {{^}}switch_unreachable:
; CHECK-NOT: s_endpgm
; CHECK: .Lfunc_end2
define amdgpu_kernel void @switch_unreachable(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind {
centry:
  switch i32 %x, label %sw.default [
    i32 0, label %sw.bb
    i32 60, label %sw.bb
  ]

sw.bb:
  unreachable

sw.default:
  unreachable

sw.epilog:
  ret void
}

declare float @llvm.fabs.f32(float) nounwind readnone

; This broke the old AMDIL cfg structurizer
; FUNC-LABEL: {{^}}loop_land_info_assert:
; SI: s_cmp_gt_i32
; SI-NEXT: s_cbranch_scc0 [[ENDPGM:BB[0-9]+_[0-9]+]]

; SI: s_cmpk_gt_i32
; SI-NEXT: s_cbranch_scc1 [[ENDPGM]]

; SI: [[INFLOOP:BB[0-9]+_[0-9]+]]
; SI: s_branch [[INFLOOP]]

; SI: [[ENDPGM]]:
; SI: s_endpgm
define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind {
entry:
  %cmp = icmp sgt i32 %c0, 0
  br label %while.cond.outer

while.cond.outer:
  %tmp = load float, float addrspace(1)* undef
  br label %while.cond

while.cond:
  %cmp1 = icmp slt i32 %c1, 4
  br i1 %cmp1, label %convex.exit, label %for.cond

convex.exit:
  %or = or i1 %cmp, %cmp1
  br i1 %or, label %return, label %if.end

if.end:
  %tmp3 = call float @llvm.fabs.f32(float %tmp) nounwind readnone
  %cmp2 = fcmp olt float %tmp3, 0x3E80000000000000
  br i1 %cmp2, label %if.else, label %while.cond.outer

if.else:
  store volatile i32 3, i32 addrspace(1)* undef, align 4
  br label %while.cond

for.cond:
  %cmp3 = icmp slt i32 %c3, 1000
  br i1 %cmp3, label %for.body, label %return

for.body:
  br i1 %cmp3, label %self.loop, label %if.end.2

if.end.2:
  %or.cond2 = or i1 %cmp3, %arg
  br i1 %or.cond2, label %return, label %for.cond

self.loop:
 br label %self.loop

return:
  ret void
}


declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0

attributes #0 = { nounwind readnone }
R600/SI: Fix verifier error caused by SIAnnotateControlFlow This pass will always try to insert llvm.SI.ifbreak intrinsics in the same block that its conditional value is computed in. This is a problem when conditions for breaks or continue are computed outside of the loop, because the llvm.SI.ifbreak intrinsic ends up being inserted outside of the loop. This patch fixes this problem by inserting the llvm.SI.ifbreak intrinsics in the loop header when the condition is computed outside the loop. llvm-svn: 234891 2015-04-14 22:36:45 +08:00			`; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs \| FileCheck --check-prefix=SI --check-prefix=FUNC %s`
Enable FeatureFlatForGlobal on Volcanic Islands This switches to the workaround that HSA defaults to for the mesa path. This should be applied to the 4.0 branch. Patch by Vedran Miletić <vedran@miletic.net> llvm-svn: 292982 2017-01-25 06:02:15 +08:00			`; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs \| FileCheck --check-prefix=SI --check-prefix=FUNC %s`
R600/SI: Fix verifier error caused by SIAnnotateControlFlow This pass will always try to insert llvm.SI.ifbreak intrinsics in the same block that its conditional value is computed in. This is a problem when conditions for breaks or continue are computed outside of the loop, because the llvm.SI.ifbreak intrinsic ends up being inserted outside of the loop. This patch fixes this problem by inserting the llvm.SI.ifbreak intrinsics in the loop header when the condition is computed outside the loop. llvm-svn: 234891 2015-04-14 22:36:45 +08:00
			`; FUNC-LABEL: {{^}}break_inserted_outside_of_loop:`

			`; SI: [[LOOP_LABEL:[A-Z0-9]+]]:`
			`; Lowered break instructin:`
			`; SI: s_or_b64`
			`; Lowered Loop instruction:`
			`; SI: s_andn2_b64`
			`; s_cbranch_execnz [[LOOP_LABEL]]`
			`; SI: s_endpgm`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @break_inserted_outside_of_loop(i32 addrspace(1)* %out, i32 %a) {`
R600/SI: Fix verifier error caused by SIAnnotateControlFlow This pass will always try to insert llvm.SI.ifbreak intrinsics in the same block that its conditional value is computed in. This is a problem when conditions for breaks or continue are computed outside of the loop, because the llvm.SI.ifbreak intrinsic ends up being inserted outside of the loop. This patch fixes this problem by inserting the llvm.SI.ifbreak intrinsics in the loop header when the condition is computed outside the loop. llvm-svn: 234891 2015-04-14 22:36:45 +08:00			`main_body:`
AMDGPU/SI: Detect uniform branches and emit s_cbranch instructions Reviewers: arsenm Subscribers: mareko, MatzeB, qcolombet, arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D16603 llvm-svn: 260765 2016-02-13 07:45:29 +08:00			`%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0`
			`%0 = and i32 %a, %tid`
R600/SI: Fix verifier error caused by SIAnnotateControlFlow This pass will always try to insert llvm.SI.ifbreak intrinsics in the same block that its conditional value is computed in. This is a problem when conditions for breaks or continue are computed outside of the loop, because the llvm.SI.ifbreak intrinsic ends up being inserted outside of the loop. This patch fixes this problem by inserting the llvm.SI.ifbreak intrinsics in the loop header when the condition is computed outside the loop. llvm-svn: 234891 2015-04-14 22:36:45 +08:00			`%1 = trunc i32 %0 to i1`
			`br label %ENDIF`

			`ENDLOOP:`
			`store i32 0, i32 addrspace(1)* %out`
			`ret void`

			`ENDIF:`
			`br i1 %1, label %ENDLOOP, label %ENDIF`
			`}`
R600/SI: Fix verifier errors from the SIAnnotateControlFlow pass This pass was generating 'Instruction does not dominate all uses!' errors for programs which had loops with a condition variable that depended on the result of a phi instruction from outside of the loop. The pass was inserting new phi nodes outside of the loop which used values defined inside the loop. http://bugs.freedesktop.org/show_bug.cgi?id=90056 llvm-svn: 236306 2015-05-01 11:44:08 +08:00

			`; FUNC-LABEL: {{^}}phi_cond_outside_loop:`
			`; FIXME: This could be folded into the s_or_b64 instruction`
			`; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0`
			`; SI: [[LOOP_LABEL:[A-Z0-9]+]]`
AMDGPU: Use unsigned compare for eq/ne For some reason there are both of these available, except for scalar 64-bit compares which only has u64. I'm not sure why there are both (I'm guessing it's for the one bit inputs we don't use), but for consistency always using the unsigned one. llvm-svn: 282832 2016-09-30 09:50:20 +08:00			`; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}}`
R600/SI: Fix verifier errors from the SIAnnotateControlFlow pass This pass was generating 'Instruction does not dominate all uses!' errors for programs which had loops with a condition variable that depended on the result of a phi instruction from outside of the loop. The pass was inserting new phi nodes outside of the loop which used values defined inside the loop. http://bugs.freedesktop.org/show_bug.cgi?id=90056 llvm-svn: 236306 2015-05-01 11:44:08 +08:00
			`; SI_IF_BREAK instruction:`
			`; SI: s_or_b64 [[BREAK:s\[[0-9]+:[0-9]+\]]], vcc, [[ZERO]]`

			`; SI_LOOP instruction:`
			`; SI: s_andn2_b64 exec, exec, [[BREAK]]`
			`; SI: s_cbranch_execnz [[LOOP_LABEL]]`
			`; SI: s_endpgm`

AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) {`
R600/SI: Fix verifier errors from the SIAnnotateControlFlow pass This pass was generating 'Instruction does not dominate all uses!' errors for programs which had loops with a condition variable that depended on the result of a phi instruction from outside of the loop. The pass was inserting new phi nodes outside of the loop which used values defined inside the loop. http://bugs.freedesktop.org/show_bug.cgi?id=90056 llvm-svn: 236306 2015-05-01 11:44:08 +08:00			`entry:`
AMDGPU/SI: Detect uniform branches and emit s_cbranch instructions Reviewers: arsenm Subscribers: mareko, MatzeB, qcolombet, arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D16603 llvm-svn: 260765 2016-02-13 07:45:29 +08:00			`%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0`
			`%0 = icmp eq i32 %tid , 0`
R600/SI: Fix verifier errors from the SIAnnotateControlFlow pass This pass was generating 'Instruction does not dominate all uses!' errors for programs which had loops with a condition variable that depended on the result of a phi instruction from outside of the loop. The pass was inserting new phi nodes outside of the loop which used values defined inside the loop. http://bugs.freedesktop.org/show_bug.cgi?id=90056 llvm-svn: 236306 2015-05-01 11:44:08 +08:00			`br i1 %0, label %if, label %else`

			`if:`
			`br label %endif`

			`else:`
			`%1 = icmp eq i32 %b, 0`
			`br label %endif`

			`endif:`
			`%2 = phi i1 [0, %if], [%1, %else]`
			`br label %loop`

			`loop:`
			`br i1 %2, label %exit, label %loop`

			`exit:`
			`ret void`
			`}`
AMDGPU/SI: Detect uniform branches and emit s_cbranch instructions Reviewers: arsenm Subscribers: mareko, MatzeB, qcolombet, arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D16603 llvm-svn: 260765 2016-02-13 07:45:29 +08:00
AMDGPU: Un-xfail and add tests Un XFAIL a few tests plus a few more I had lying around in my tree, which seem to all work now but I don't see tests that quite test the same things. llvm-svn: 273655 2016-06-24 14:58:01 +08:00			`; FIXME: should emit s_endpgm`
			`; CHECK-LABEL: {{^}}switch_unreachable:`
			`; CHECK-NOT: s_endpgm`
			`; CHECK: .Lfunc_end2`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @switch_unreachable(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind {`
AMDGPU: Un-xfail and add tests Un XFAIL a few tests plus a few more I had lying around in my tree, which seem to all work now but I don't see tests that quite test the same things. llvm-svn: 273655 2016-06-24 14:58:01 +08:00			`centry:`
			`switch i32 %x, label %sw.default [`
			`i32 0, label %sw.bb`
			`i32 60, label %sw.bb`
			`]`

			`sw.bb:`
			`unreachable`

			`sw.default:`
			`unreachable`

			`sw.epilog:`
			`ret void`
			`}`

			`declare float @llvm.fabs.f32(float) nounwind readnone`

			`; This broke the old AMDIL cfg structurizer`
			`; FUNC-LABEL: {{^}}loop_land_info_assert:`
			`; SI: s_cmp_gt_i32`
			`; SI-NEXT: s_cbranch_scc0 [[ENDPGM:BB[0-9]+_[0-9]+]]`

AMDGPU: Use SOPK compare instructions llvm-svn: 281780 2016-09-17 05:41:16 +08:00			`; SI: s_cmpk_gt_i32`
AMDGPU: Un-xfail and add tests Un XFAIL a few tests plus a few more I had lying around in my tree, which seem to all work now but I don't see tests that quite test the same things. llvm-svn: 273655 2016-06-24 14:58:01 +08:00			`; SI-NEXT: s_cbranch_scc1 [[ENDPGM]]`

			`; SI: [[INFLOOP:BB[0-9]+_[0-9]+]]`
			`; SI: s_branch [[INFLOOP]]`

			`; SI: [[ENDPGM]]:`
			`; SI: s_endpgm`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind {`
AMDGPU: Un-xfail and add tests Un XFAIL a few tests plus a few more I had lying around in my tree, which seem to all work now but I don't see tests that quite test the same things. llvm-svn: 273655 2016-06-24 14:58:01 +08:00			`entry:`
			`%cmp = icmp sgt i32 %c0, 0`
			`br label %while.cond.outer`

			`while.cond.outer:`
			`%tmp = load float, float addrspace(1)* undef`
			`br label %while.cond`

			`while.cond:`
			`%cmp1 = icmp slt i32 %c1, 4`
			`br i1 %cmp1, label %convex.exit, label %for.cond`

			`convex.exit:`
			`%or = or i1 %cmp, %cmp1`
			`br i1 %or, label %return, label %if.end`

			`if.end:`
			`%tmp3 = call float @llvm.fabs.f32(float %tmp) nounwind readnone`
			`%cmp2 = fcmp olt float %tmp3, 0x3E80000000000000`
			`br i1 %cmp2, label %if.else, label %while.cond.outer`

			`if.else:`
			`store volatile i32 3, i32 addrspace(1)* undef, align 4`
			`br label %while.cond`

			`for.cond:`
			`%cmp3 = icmp slt i32 %c3, 1000`
			`br i1 %cmp3, label %for.body, label %return`

			`for.body:`
			`br i1 %cmp3, label %self.loop, label %if.end.2`

			`if.end.2:`
			`%or.cond2 = or i1 %cmp3, %arg`
			`br i1 %or.cond2, label %return, label %for.cond`

			`self.loop:`
			`br label %self.loop`

			`return:`
			`ret void`
			`}`


AMDGPU/SI: Detect uniform branches and emit s_cbranch instructions Reviewers: arsenm Subscribers: mareko, MatzeB, qcolombet, arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D16603 llvm-svn: 260765 2016-02-13 07:45:29 +08:00			`declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0`

			`attributes #0 = { nounwind readnone }`