llvm-project/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-co...

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s

; This module creates a divergent branch. The branch is marked as divergent by
; the divergence analysis but the condition is not. This test ensures that the
; divergence of the branch is tested, not its condition, so that branch is
; correctly emitted as divergent.

target triple = "amdgcn-mesa-mesa3d"

define amdgpu_ps void @main(i32, float) {
; CHECK-LABEL: main:
; CHECK:       ; %bb.0: ; %start
; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
; CHECK-NEXT:    s_mov_b32 m0, s0
; CHECK-NEXT:    s_mov_b32 s0, 0
; CHECK-NEXT:    v_interp_p1_f32_e32 v0, v1, attr0.x
; CHECK-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
; CHECK-NEXT:    s_mov_b64 s[2:3], 0
; CHECK-NEXT:    ; implicit-def: $sgpr6_sgpr7
; CHECK-NEXT:    ; implicit-def: $sgpr4_sgpr5
; CHECK-NEXT:    s_branch BB0_3
; CHECK-NEXT:  BB0_1: ; %Flow1
; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
; CHECK-NEXT:    s_mov_b64 s[8:9], 0
; CHECK-NEXT:  BB0_2: ; %Flow
; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT:    s_and_b64 s[10:11], exec, s[6:7]
; CHECK-NEXT:    s_or_b64 s[2:3], s[10:11], s[2:3]
; CHECK-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
; CHECK-NEXT:    s_and_b64 s[8:9], s[8:9], exec
; CHECK-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
; CHECK-NEXT:    s_andn2_b64 exec, exec, s[2:3]
; CHECK-NEXT:    s_cbranch_execz BB0_6
; CHECK-NEXT:  BB0_3: ; %loop
; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    s_or_b64 s[6:7], s[6:7], exec
; CHECK-NEXT:    s_cmp_lt_u32 s0, 32
; CHECK-NEXT:    s_mov_b64 s[8:9], -1
; CHECK-NEXT:    s_cbranch_scc0 BB0_2
; CHECK-NEXT:  ; %bb.4: ; %endif1
; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT:    s_mov_b64 s[6:7], -1
; CHECK-NEXT:    s_and_saveexec_b64 s[8:9], vcc
; CHECK-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
; CHECK-NEXT:    s_cbranch_execz BB0_1
; CHECK-NEXT:  ; %bb.5: ; %endif2
; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT:    s_add_i32 s0, s0, 1
; CHECK-NEXT:    s_xor_b64 s[6:7], exec, -1
; CHECK-NEXT:    s_branch BB0_1
; CHECK-NEXT:  BB0_6: ; %Flow2
; CHECK-NEXT:    s_or_b64 exec, exec, s[2:3]
; CHECK-NEXT:    v_mov_b32_e32 v1, 0
; CHECK-NEXT:    s_and_saveexec_b64 s[0:1], s[4:5]
; CHECK-NEXT:  ; %bb.7: ; %if1
; CHECK-NEXT:    v_sqrt_f32_e32 v1, v0
; CHECK-NEXT:  ; %bb.8: ; %endloop
; CHECK-NEXT:    s_or_b64 exec, exec, s[0:1]
; CHECK-NEXT:    exp mrt0 v1, v1, v1, v1 done vm
; CHECK-NEXT:    s_endpgm

; this is the divergent branch with the condition not marked as divergent
start:
  %v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0)
  br label %loop

loop:
  %v1 = phi i32 [ 0, %start ], [ %v5, %endif2 ]
  %v2 = icmp ugt i32 %v1, 31
  br i1 %v2, label %if1, label %endif1

if1:
  %v3 = call float @llvm.sqrt.f32(float %v0)
  br label %endloop

endif1:
  %v4 = fcmp ogt float %v0, 0.000000e+00
  br i1 %v4, label %endloop, label %endif2

endif2:
  %v5 = add i32 %v1, 1
  br label %loop

endloop:
  %v6 = phi float [ %v3, %if1 ], [ 0.0, %endif1 ]
  call void @llvm.amdgcn.exp.v4f32(i32 0, i32 15, float %v6, float %v6, float %v6, float %v6, i1 true, i1 true)
  ret void
}

declare float @llvm.sqrt.f32(float) #1
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
declare void @llvm.amdgcn.exp.v4f32(i32, i32, float, float, float, float, i1, i1) #0

attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
AMDGPU: test for uniformity of branch instruction, not its condition Summary: If a divergent branch instruction is marked as divergent by propagation rule 2 in DivergencePropagator::exploreSyncDependency() and its condition is uniform, that branch would incorrectly be assumed to be uniform. Reviewers: arsenm, tstellar Reviewed By: arsenm Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D56331 llvm-svn: 350532 2019-01-07 23:52:28 +08:00			`; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py`
			`; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck %s`

			`; This module creates a divergent branch. The branch is marked as divergent by`
			`; the divergence analysis but the condition is not. This test ensures that the`
			`; divergence of the branch is tested, not its condition, so that branch is`
			`; correctly emitted as divergent.`

			`target triple = "amdgcn-mesa-mesa3d"`

			`define amdgpu_ps void @main(i32, float) {`
			`; CHECK-LABEL: main:`
			`; CHECK: ; %bb.0: ; %start`
			`; CHECK-NEXT: v_readfirstlane_b32 s0, v0`
			`; CHECK-NEXT: s_mov_b32 m0, s0`
[AMDGPU] Come back patch for the 'Assign register class for cross block values according to the divergence.' Detailed description: After https://reviews.llvm.org/D59990 submit several issues were discovered. Changes in common code were preserved but AMDGPU specific part was reverted to keep the backend working correctly. Discovered issues were addressed in the following commits: https://reviews.llvm.org/D67662 https://reviews.llvm.org/D67101 https://reviews.llvm.org/D63953 https://reviews.llvm.org/D63731 This change brings back AMDGPU specific changes. Reviewed by: rampitec, arsenm Differential Revision: https://reviews.llvm.org/D68635 llvm-svn: 374767 2019-10-14 20:01:10 +08:00			`; CHECK-NEXT: s_mov_b32 s0, 0`
AMDGPU: test for uniformity of branch instruction, not its condition Summary: If a divergent branch instruction is marked as divergent by propagation rule 2 in DivergencePropagator::exploreSyncDependency() and its condition is uniform, that branch would incorrectly be assumed to be uniform. Reviewers: arsenm, tstellar Reviewed By: arsenm Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D56331 llvm-svn: 350532 2019-01-07 23:52:28 +08:00			`; CHECK-NEXT: v_interp_p1_f32_e32 v0, v1, attr0.x`
[AMDGPU] Come back patch for the 'Assign register class for cross block values according to the divergence.' Detailed description: After https://reviews.llvm.org/D59990 submit several issues were discovered. Changes in common code were preserved but AMDGPU specific part was reverted to keep the backend working correctly. Discovered issues were addressed in the following commits: https://reviews.llvm.org/D67662 https://reviews.llvm.org/D67101 https://reviews.llvm.org/D63953 https://reviews.llvm.org/D63731 This change brings back AMDGPU specific changes. Reviewed by: rampitec, arsenm Differential Revision: https://reviews.llvm.org/D68635 llvm-svn: 374767 2019-10-14 20:01:10 +08:00			`; CHECK-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0`
[AMDGPU] Fix emitIfBreak CF lowering: use temp reg to make register coalescer life easier. Differential revision: https://reviews.llvm.org/D70405 2019-11-19 01:06:48 +08:00			`; CHECK-NEXT: s_mov_b64 s[2:3], 0`
[AMDGPU] Partial revert for the ba447bae7448435c9986eece0811da1423972fdd "Divergence driven ISel. Assign register class for cross block values according to the divergence." that discovered the design flaw leading to several issues that required to be solved before. This change reverts AMDGPU specific changes and keeps common part unaffected. llvm-svn: 362749 2019-06-07 05:13:02 +08:00			`; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7`
[AMDGPU] Fix emitIfBreak CF lowering: use temp reg to make register coalescer life easier. Differential revision: https://reviews.llvm.org/D70405 2019-11-19 01:06:48 +08:00			`; CHECK-NEXT: ; implicit-def: $sgpr4_sgpr5`
Revert [MBP] Disable aggressive loop rotate in plain mode This reverts r369664 (git commit 51f48295cbe8fa3a44db263b528dd9f7bae7bf9a) It causes many benchmark regressions, internally and in llvm's benchmark suite. llvm-svn: 370398 2019-08-30 03:03:58 +08:00			`; CHECK-NEXT: s_branch BB0_3`
			`; CHECK-NEXT: BB0_1: ; %Flow1`
			`; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1`
			`; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]`
[AMDGPU] Fix emitIfBreak CF lowering: use temp reg to make register coalescer life easier. Differential revision: https://reviews.llvm.org/D70405 2019-11-19 01:06:48 +08:00			`; CHECK-NEXT: s_mov_b64 s[8:9], 0`
Revert [MBP] Disable aggressive loop rotate in plain mode This reverts r369664 (git commit 51f48295cbe8fa3a44db263b528dd9f7bae7bf9a) It causes many benchmark regressions, internally and in llvm's benchmark suite. llvm-svn: 370398 2019-08-30 03:03:58 +08:00			`; CHECK-NEXT: BB0_2: ; %Flow`
			`; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1`
[AMDGPU] Fix emitIfBreak CF lowering: use temp reg to make register coalescer life easier. Differential revision: https://reviews.llvm.org/D70405 2019-11-19 01:06:48 +08:00			`; CHECK-NEXT: s_and_b64 s[10:11], exec, s[6:7]`
			`; CHECK-NEXT: s_or_b64 s[2:3], s[10:11], s[2:3]`
			`; CHECK-NEXT: s_andn2_b64 s[4:5], s[4:5], exec`
			`; CHECK-NEXT: s_and_b64 s[8:9], s[8:9], exec`
			`; CHECK-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]`
			`; CHECK-NEXT: s_andn2_b64 exec, exec, s[2:3]`
[AMDGPU] SIRemoveShortExecBranches should not remove branches exiting loops Summary: Check that a s_cbranch_execz is not a loop exit before removing it. As the pass is generating infinite loops. Reviewers: cdevadas, arsenm, nhaehnle Reviewed By: nhaehnle Subscribers: kzhuravl, jvesely, wdng, yaxunl, tpr, t-tye, hiraditya, kerbowa, llvm-commits, dstuttard, foad Tags: #llvm Differential Revision: https://reviews.llvm.org/D72997 2020-01-22 12:11:29 +08:00			`; CHECK-NEXT: s_cbranch_execz BB0_6`
Revert [MBP] Disable aggressive loop rotate in plain mode This reverts r369664 (git commit 51f48295cbe8fa3a44db263b528dd9f7bae7bf9a) It causes many benchmark regressions, internally and in llvm's benchmark suite. llvm-svn: 370398 2019-08-30 03:03:58 +08:00			`; CHECK-NEXT: BB0_3: ; %loop`
AMDGPU: test for uniformity of branch instruction, not its condition Summary: If a divergent branch instruction is marked as divergent by propagation rule 2 in DivergencePropagator::exploreSyncDependency() and its condition is uniform, that branch would incorrectly be assumed to be uniform. Reviewers: arsenm, tstellar Reviewed By: arsenm Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D56331 llvm-svn: 350532 2019-01-07 23:52:28 +08:00			`; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1`
[AMDGPU] Partial revert for the ba447bae7448435c9986eece0811da1423972fdd "Divergence driven ISel. Assign register class for cross block values according to the divergence." that discovered the design flaw leading to several issues that required to be solved before. This change reverts AMDGPU specific changes and keeps common part unaffected. llvm-svn: 362749 2019-06-07 05:13:02 +08:00			`; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec`
[AMDGPU] Come back patch for the 'Assign register class for cross block values according to the divergence.' Detailed description: After https://reviews.llvm.org/D59990 submit several issues were discovered. Changes in common code were preserved but AMDGPU specific part was reverted to keep the backend working correctly. Discovered issues were addressed in the following commits: https://reviews.llvm.org/D67662 https://reviews.llvm.org/D67101 https://reviews.llvm.org/D63953 https://reviews.llvm.org/D63731 This change brings back AMDGPU specific changes. Reviewed by: rampitec, arsenm Differential Revision: https://reviews.llvm.org/D68635 llvm-svn: 374767 2019-10-14 20:01:10 +08:00			`; CHECK-NEXT: s_cmp_lt_u32 s0, 32`
[AMDGPU] Fix emitIfBreak CF lowering: use temp reg to make register coalescer life easier. Differential revision: https://reviews.llvm.org/D70405 2019-11-19 01:06:48 +08:00			`; CHECK-NEXT: s_mov_b64 s[8:9], -1`
[AMDGPU] Come back patch for the 'Assign register class for cross block values according to the divergence.' Detailed description: After https://reviews.llvm.org/D59990 submit several issues were discovered. Changes in common code were preserved but AMDGPU specific part was reverted to keep the backend working correctly. Discovered issues were addressed in the following commits: https://reviews.llvm.org/D67662 https://reviews.llvm.org/D67101 https://reviews.llvm.org/D63953 https://reviews.llvm.org/D63731 This change brings back AMDGPU specific changes. Reviewed by: rampitec, arsenm Differential Revision: https://reviews.llvm.org/D68635 llvm-svn: 374767 2019-10-14 20:01:10 +08:00			`; CHECK-NEXT: s_cbranch_scc0 BB0_2`
Revert [MBP] Disable aggressive loop rotate in plain mode This reverts r369664 (git commit 51f48295cbe8fa3a44db263b528dd9f7bae7bf9a) It causes many benchmark regressions, internally and in llvm's benchmark suite. llvm-svn: 370398 2019-08-30 03:03:58 +08:00			`; CHECK-NEXT: ; %bb.4: ; %endif1`
			`; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1`
[AMDGPU] Partial revert for the ba447bae7448435c9986eece0811da1423972fdd "Divergence driven ISel. Assign register class for cross block values according to the divergence." that discovered the design flaw leading to several issues that required to be solved before. This change reverts AMDGPU specific changes and keeps common part unaffected. llvm-svn: 362749 2019-06-07 05:13:02 +08:00			`; CHECK-NEXT: s_mov_b64 s[6:7], -1`
[AMDGPU] Come back patch for the 'Assign register class for cross block values according to the divergence.' Detailed description: After https://reviews.llvm.org/D59990 submit several issues were discovered. Changes in common code were preserved but AMDGPU specific part was reverted to keep the backend working correctly. Discovered issues were addressed in the following commits: https://reviews.llvm.org/D67662 https://reviews.llvm.org/D67101 https://reviews.llvm.org/D63953 https://reviews.llvm.org/D63731 This change brings back AMDGPU specific changes. Reviewed by: rampitec, arsenm Differential Revision: https://reviews.llvm.org/D68635 llvm-svn: 374767 2019-10-14 20:01:10 +08:00			`; CHECK-NEXT: s_and_saveexec_b64 s[8:9], vcc`
[AMDGPU] Partial revert for the ba447bae7448435c9986eece0811da1423972fdd "Divergence driven ISel. Assign register class for cross block values according to the divergence." that discovered the design flaw leading to several issues that required to be solved before. This change reverts AMDGPU specific changes and keeps common part unaffected. llvm-svn: 362749 2019-06-07 05:13:02 +08:00			`; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[8:9]`
Revert [MBP] Disable aggressive loop rotate in plain mode This reverts r369664 (git commit 51f48295cbe8fa3a44db263b528dd9f7bae7bf9a) It causes many benchmark regressions, internally and in llvm's benchmark suite. llvm-svn: 370398 2019-08-30 03:03:58 +08:00			`; CHECK-NEXT: s_cbranch_execz BB0_1`
Resubmit: [AMDGPU] Invert the handling of skip insertion. The current implementation of skip insertion (SIInsertSkip) makes it a mandatory pass required for correctness. Initially, the idea was to have an optional pass. This patch inserts the s_cbranch_execz upfront during SILowerControlFlow to skip over the sections of code when no lanes are active. Later, SIRemoveShortExecBranches removes the skips for short branches, unless there is a sideeffect and the skip branch is really necessary. This new pass will replace the handling of skip insertion in the existing SIInsertSkip Pass. Differential revision: https://reviews.llvm.org/D68092 2020-01-22 12:07:55 +08:00			`; CHECK-NEXT: ; %bb.5: ; %endif2`
Revert [MBP] Disable aggressive loop rotate in plain mode This reverts r369664 (git commit 51f48295cbe8fa3a44db263b528dd9f7bae7bf9a) It causes many benchmark regressions, internally and in llvm's benchmark suite. llvm-svn: 370398 2019-08-30 03:03:58 +08:00			`; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1`
[AMDGPU] Come back patch for the 'Assign register class for cross block values according to the divergence.' Detailed description: After https://reviews.llvm.org/D59990 submit several issues were discovered. Changes in common code were preserved but AMDGPU specific part was reverted to keep the backend working correctly. Discovered issues were addressed in the following commits: https://reviews.llvm.org/D67662 https://reviews.llvm.org/D67101 https://reviews.llvm.org/D63953 https://reviews.llvm.org/D63731 This change brings back AMDGPU specific changes. Reviewed by: rampitec, arsenm Differential Revision: https://reviews.llvm.org/D68635 llvm-svn: 374767 2019-10-14 20:01:10 +08:00			`; CHECK-NEXT: s_add_i32 s0, s0, 1`
[AMDGPU] Partial revert for the ba447bae7448435c9986eece0811da1423972fdd "Divergence driven ISel. Assign register class for cross block values according to the divergence." that discovered the design flaw leading to several issues that required to be solved before. This change reverts AMDGPU specific changes and keeps common part unaffected. llvm-svn: 362749 2019-06-07 05:13:02 +08:00			`; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1`
Revert [MBP] Disable aggressive loop rotate in plain mode This reverts r369664 (git commit 51f48295cbe8fa3a44db263b528dd9f7bae7bf9a) It causes many benchmark regressions, internally and in llvm's benchmark suite. llvm-svn: 370398 2019-08-30 03:03:58 +08:00			`; CHECK-NEXT: s_branch BB0_1`
[AMDGPU] SIRemoveShortExecBranches should not remove branches exiting loops Summary: Check that a s_cbranch_execz is not a loop exit before removing it. As the pass is generating infinite loops. Reviewers: cdevadas, arsenm, nhaehnle Reviewed By: nhaehnle Subscribers: kzhuravl, jvesely, wdng, yaxunl, tpr, t-tye, hiraditya, kerbowa, llvm-commits, dstuttard, foad Tags: #llvm Differential Revision: https://reviews.llvm.org/D72997 2020-01-22 12:11:29 +08:00			`; CHECK-NEXT: BB0_6: ; %Flow2`
[AMDGPU] Fix emitIfBreak CF lowering: use temp reg to make register coalescer life easier. Differential revision: https://reviews.llvm.org/D70405 2019-11-19 01:06:48 +08:00			`; CHECK-NEXT: s_or_b64 exec, exec, s[2:3]`
AMDGPU: test for uniformity of branch instruction, not its condition Summary: If a divergent branch instruction is marked as divergent by propagation rule 2 in DivergencePropagator::exploreSyncDependency() and its condition is uniform, that branch would incorrectly be assumed to be uniform. Reviewers: arsenm, tstellar Reviewed By: arsenm Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D56331 llvm-svn: 350532 2019-01-07 23:52:28 +08:00			`; CHECK-NEXT: v_mov_b32_e32 v1, 0`
[AMDGPU] Fix emitIfBreak CF lowering: use temp reg to make register coalescer life easier. Differential revision: https://reviews.llvm.org/D70405 2019-11-19 01:06:48 +08:00			`; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[4:5]`
Resubmit: [AMDGPU] Invert the handling of skip insertion. The current implementation of skip insertion (SIInsertSkip) makes it a mandatory pass required for correctness. Initially, the idea was to have an optional pass. This patch inserts the s_cbranch_execz upfront during SILowerControlFlow to skip over the sections of code when no lanes are active. Later, SIRemoveShortExecBranches removes the skips for short branches, unless there is a sideeffect and the skip branch is really necessary. This new pass will replace the handling of skip insertion in the existing SIInsertSkip Pass. Differential revision: https://reviews.llvm.org/D68092 2020-01-22 12:07:55 +08:00			`; CHECK-NEXT: ; %bb.7: ; %if1`
AMDGPU: test for uniformity of branch instruction, not its condition Summary: If a divergent branch instruction is marked as divergent by propagation rule 2 in DivergencePropagator::exploreSyncDependency() and its condition is uniform, that branch would incorrectly be assumed to be uniform. Reviewers: arsenm, tstellar Reviewed By: arsenm Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D56331 llvm-svn: 350532 2019-01-07 23:52:28 +08:00			`; CHECK-NEXT: v_sqrt_f32_e32 v1, v0`
Resubmit: [AMDGPU] Invert the handling of skip insertion. The current implementation of skip insertion (SIInsertSkip) makes it a mandatory pass required for correctness. Initially, the idea was to have an optional pass. This patch inserts the s_cbranch_execz upfront during SILowerControlFlow to skip over the sections of code when no lanes are active. Later, SIRemoveShortExecBranches removes the skips for short branches, unless there is a sideeffect and the skip branch is really necessary. This new pass will replace the handling of skip insertion in the existing SIInsertSkip Pass. Differential revision: https://reviews.llvm.org/D68092 2020-01-22 12:07:55 +08:00			`; CHECK-NEXT: ; %bb.8: ; %endloop`
AMDGPU: test for uniformity of branch instruction, not its condition Summary: If a divergent branch instruction is marked as divergent by propagation rule 2 in DivergencePropagator::exploreSyncDependency() and its condition is uniform, that branch would incorrectly be assumed to be uniform. Reviewers: arsenm, tstellar Reviewed By: arsenm Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D56331 llvm-svn: 350532 2019-01-07 23:52:28 +08:00			`; CHECK-NEXT: s_or_b64 exec, exec, s[0:1]`
			`; CHECK-NEXT: exp mrt0 v1, v1, v1, v1 done vm`
			`; CHECK-NEXT: s_endpgm`
[AMDGPU] Fix emitIfBreak CF lowering: use temp reg to make register coalescer life easier. Differential revision: https://reviews.llvm.org/D70405 2019-11-19 01:06:48 +08:00
Revert [MBP] Disable aggressive loop rotate in plain mode This reverts r369664 (git commit 51f48295cbe8fa3a44db263b528dd9f7bae7bf9a) It causes many benchmark regressions, internally and in llvm's benchmark suite. llvm-svn: 370398 2019-08-30 03:03:58 +08:00			`; this is the divergent branch with the condition not marked as divergent`
AMDGPU: test for uniformity of branch instruction, not its condition Summary: If a divergent branch instruction is marked as divergent by propagation rule 2 in DivergencePropagator::exploreSyncDependency() and its condition is uniform, that branch would incorrectly be assumed to be uniform. Reviewers: arsenm, tstellar Reviewed By: arsenm Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D56331 llvm-svn: 350532 2019-01-07 23:52:28 +08:00			`start:`
			`%v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0)`
			`br label %loop`

			`loop:`
			`%v1 = phi i32 [ 0, %start ], [ %v5, %endif2 ]`
			`%v2 = icmp ugt i32 %v1, 31`
			`br i1 %v2, label %if1, label %endif1`

			`if1:`
			`%v3 = call float @llvm.sqrt.f32(float %v0)`
			`br label %endloop`

			`endif1:`
			`%v4 = fcmp ogt float %v0, 0.000000e+00`
			`br i1 %v4, label %endloop, label %endif2`

			`endif2:`
			`%v5 = add i32 %v1, 1`
			`br label %loop`

			`endloop:`
			`%v6 = phi float [ %v3, %if1 ], [ 0.0, %endif1 ]`
			`call void @llvm.amdgcn.exp.v4f32(i32 0, i32 15, float %v6, float %v6, float %v6, float %v6, i1 true, i1 true)`
			`ret void`
			`}`

			`declare float @llvm.sqrt.f32(float) #1`
			`declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1`
			`declare void @llvm.amdgcn.exp.v4f32(i32, i32, float, float, float, float, i1, i1) #0`

			`attributes #0 = { nounwind }`
			`attributes #1 = { nounwind readnone }`