llvm-project/llvm/test/CodeGen/AMDGPU/control-flow-optnone.ll

; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s

; optnone disables AMDGPUAnnotateUniformValues, so no branch is known
; to be uniform during instruction selection. The custom selection for
; brcond was not checking if the branch was uniform, relying on the
; selection pattern to check that. That would fail, so then the branch
; would fail to select.

; GCN-LABEL: {{^}}copytoreg_divergent_brcond:
; GCN: s_branch

; GCN-DAG: v_cmp_lt_i32
; GCN-DAG: v_cmp_gt_i32
; GCN: s_and_b64
; GCN: s_mov_b64 exec

; GCN: s_or_b64 exec, exec
; GCN: s_cmp_eq_u32
; GCN: s_cbranch_scc1
; GCN-NEXT: s_branch
define amdgpu_kernel void @copytoreg_divergent_brcond(i32 %arg, i32 %arg1, i32 %arg2) #0 {
bb:
  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
  %tmp3 = zext i32 %tmp to i64
  %tmp5 = add i64 %tmp3, undef
  %tmp6 = trunc i64 %tmp5 to i32
  %tmp7 = mul nsw i32 %tmp6, %arg2
  br label %bb8

bb8.loopexit:                                     ; preds = %bb14
  br label %bb8

bb8:                                              ; preds = %bb8.loopexit, %bb
  br label %bb9

bb9:                                              ; preds = %bb14, %bb8
  %tmp10 = icmp slt i32 %tmp7, %arg1
  %tmp11 = icmp sgt i32 %arg, 0
  %tmp12 = and i1 %tmp10, %tmp11
  br i1 %tmp12, label %bb13, label %bb14

bb13:                                             ; preds = %bb9
  store volatile i32 0, i32 addrspace(1)* undef, align 4
  br label %bb14

bb14:                                             ; preds = %bb13, %bb9
  %tmp15 = icmp eq i32 %arg2, 1
  br i1 %tmp15, label %bb8.loopexit, label %bb9
}

declare i32 @llvm.amdgcn.workitem.id.x() #1

attributes #0 = { nounwind optnone noinline }
attributes #1 = { nounwind readnone speculatable }
AMDGPU: Fix failure to select branch with optnone opt-bisect/optnone disable the AMDGPUUniformAnnotateValues pass. The heuristic in the custom selector for brcond deferred the branch uniformity check to the pattern, which would fail. llvm-svn: 315360 2017-10-11 04:34:49 +08:00			`; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN %s`

			`; optnone disables AMDGPUAnnotateUniformValues, so no branch is known`
			`; to be uniform during instruction selection. The custom selection for`
			`; brcond was not checking if the branch was uniform, relying on the`
			`; selection pattern to check that. That would fail, so then the branch`
			`; would fail to select.`

			`; GCN-LABEL: {{^}}copytoreg_divergent_brcond:`
			`; GCN: s_branch`

			`; GCN-DAG: v_cmp_lt_i32`
			`; GCN-DAG: v_cmp_gt_i32`
			`; GCN: s_and_b64`
AMDGPU: Fix missing skipFunction calls llvm-svn: 315361 2017-10-11 04:48:36 +08:00			`; GCN: s_mov_b64 exec`
AMDGPU: Fix failure to select branch with optnone opt-bisect/optnone disable the AMDGPUUniformAnnotateValues pass. The heuristic in the custom selector for brcond deferred the branch uniformity check to the pattern, which would fail. llvm-svn: 315360 2017-10-11 04:34:49 +08:00
			`; GCN: s_or_b64 exec, exec`
[StructurizeCFG] Enable -structurizecfg-relaxed-uniform-regions by default D62198 introduced an option to relax the checks for hasOnlyUniformBranches. This commit turns the option on by default, for better code generation in some cases in AMDGPU. Differential Revision: https://reviews.llvm.org/D63198 Change-Id: I9cbff002a1e74d3b7eb96b4192dc8129936d537d llvm-svn: 368042 2019-08-06 22:30:19 +08:00			`; GCN: s_cmp_eq_u32`
			`; GCN: s_cbranch_scc1`
AMDGPU: Fix failure to select branch with optnone opt-bisect/optnone disable the AMDGPUUniformAnnotateValues pass. The heuristic in the custom selector for brcond deferred the branch uniformity check to the pattern, which would fail. llvm-svn: 315360 2017-10-11 04:34:49 +08:00			`; GCN-NEXT: s_branch`
			`define amdgpu_kernel void @copytoreg_divergent_brcond(i32 %arg, i32 %arg1, i32 %arg2) #0 {`
			`bb:`
			`%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()`
			`%tmp3 = zext i32 %tmp to i64`
			`%tmp5 = add i64 %tmp3, undef`
			`%tmp6 = trunc i64 %tmp5 to i32`
			`%tmp7 = mul nsw i32 %tmp6, %arg2`
			`br label %bb8`

			`bb8.loopexit: ; preds = %bb14`
			`br label %bb8`

			`bb8: ; preds = %bb8.loopexit, %bb`
			`br label %bb9`

			`bb9: ; preds = %bb14, %bb8`
			`%tmp10 = icmp slt i32 %tmp7, %arg1`
			`%tmp11 = icmp sgt i32 %arg, 0`
			`%tmp12 = and i1 %tmp10, %tmp11`
			`br i1 %tmp12, label %bb13, label %bb14`

			`bb13: ; preds = %bb9`
			`store volatile i32 0, i32 addrspace(1)* undef, align 4`
			`br label %bb14`

			`bb14: ; preds = %bb13, %bb9`
			`%tmp15 = icmp eq i32 %arg2, 1`
			`br i1 %tmp15, label %bb8.loopexit, label %bb9`
			`}`

			`declare i32 @llvm.amdgcn.workitem.id.x() #1`

			`attributes #0 = { nounwind optnone noinline }`
			`attributes #1 = { nounwind readnone speculatable }`