llvm-project/llvm/test/CodeGen/AMDGPU/update-phi.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -mtriple=amdgcn-- -S -amdgpu-unify-divergent-exit-nodes -verify %s | FileCheck -check-prefix=IR %s

; Make sure that the phi in n28 is updated when the block is split by unify
; divergent exit nodes.

define amdgpu_ps void @_amdgpu_ps_main() local_unnamed_addr #3 {
; IR-LABEL: @_amdgpu_ps_main(
; IR-NEXT:  .entry:
; IR-NEXT:    br label [[DOTLOOPEXIT:%.*]]
; IR:       .loopexit:
; IR-NEXT:    br label [[N28:%.*]]
; IR:       n28:
; IR-NEXT:    [[DOT01:%.*]] = phi float [ 0.000000e+00, [[DOTLOOPEXIT]] ], [ [[N29:%.*]], [[TRANSITIONBLOCK:%.*]] ]
; IR-NEXT:    [[N29]] = fadd float [[DOT01]], 1.000000e+00
; IR-NEXT:    [[N30:%.*]] = fcmp ogt float [[N29]], 4.000000e+00
; IR-NEXT:    br i1 true, label [[TRANSITIONBLOCK]], label [[UNIFIEDRETURNBLOCK:%.*]]
; IR:       TransitionBlock:
; IR-NEXT:    br i1 [[N30]], label [[DOTLOOPEXIT]], label [[N28]]
; IR:       UnifiedReturnBlock:
; IR-NEXT:    call void @llvm.amdgcn.exp.f32(i32 9, i32 0, float undef, float undef, float undef, float undef, i1 true, i1 true)
; IR-NEXT:    ret void
;
.entry:
  br label %.loopexit

.loopexit:                                        ; preds = %n28, %.entry
  br label %n28

n28:                                               ; preds = %.loopexit, %n28
  %.01 = phi float [ 0.000000e+00, %.loopexit ], [ %n29, %n28 ]
  %n29 = fadd float %.01, 1.0
  %n30 = fcmp ogt float %n29, 4.000000e+00
  br i1 %n30, label %.loopexit, label %n28

n31:                                               ; preds =
  ret void
}
Update phis in AMDGPUUnifyDivergentExitNodes Original patch https://reviews.llvm.org/D63659 from Steven Perron <stevenperron@google.com> The pass AMDGPUUnifyDivergentExitNodes does not update the phi nodes in the successors of blocks that is splits. This is fixed by calling BasicBlock::splitBasicBlock to split the block instead of doing it manually. This does extra work because a new conditional branch is created in BB which is immediately replaced, but I think the simplicity is worth it. It also helps make the code more future proof in case other things need to be updated. llvm-svn: 364342 2019-06-26 02:55:16 +08:00			`; NOTE: Assertions have been autogenerated by utils/update_test_checks.py`
			`; RUN: opt -mtriple=amdgcn-- -S -amdgpu-unify-divergent-exit-nodes -verify %s \| FileCheck -check-prefix=IR %s`

			`; Make sure that the phi in n28 is updated when the block is split by unify`
			`; divergent exit nodes.`

			`define amdgpu_ps void @_amdgpu_ps_main() local_unnamed_addr #3 {`
			`; IR-LABEL: @_amdgpu_ps_main(`
			`; IR-NEXT: .entry:`
			`; IR-NEXT: br label [[DOTLOOPEXIT:%.*]]`
			`; IR: .loopexit:`
			`; IR-NEXT: br label [[N28:%.*]]`
			`; IR: n28:`
			`; IR-NEXT: [[DOT01:%.]] = phi float [ 0.000000e+00, [[DOTLOOPEXIT]] ], [ [[N29:%.]], [[TRANSITIONBLOCK:%.*]] ]`
			`; IR-NEXT: [[N29]] = fadd float [[DOT01]], 1.000000e+00`
			`; IR-NEXT: [[N30:%.*]] = fcmp ogt float [[N29]], 4.000000e+00`
AMDGPU: Fix AMDGPUUnifyDivergentExitNodes with no normal returns Summary: The code was assuming in a few places that if there was only one exit from the function that it was a normal return, which is invalid. It could be an infinite loop, in which case we still need to insert the usual fake edge so that the null export happens. This fixes shaders that end with an infinite loop that discards. Reviewers: arsenm, nhaehnle, critson Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D71192 2019-12-09 19:04:00 +08:00			`; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK]], label [[UNIFIEDRETURNBLOCK:%.*]]`
Update phis in AMDGPUUnifyDivergentExitNodes Original patch https://reviews.llvm.org/D63659 from Steven Perron <stevenperron@google.com> The pass AMDGPUUnifyDivergentExitNodes does not update the phi nodes in the successors of blocks that is splits. This is fixed by calling BasicBlock::splitBasicBlock to split the block instead of doing it manually. This does extra work because a new conditional branch is created in BB which is immediately replaced, but I think the simplicity is worth it. It also helps make the code more future proof in case other things need to be updated. llvm-svn: 364342 2019-06-26 02:55:16 +08:00			`; IR: TransitionBlock:`
			`; IR-NEXT: br i1 [[N30]], label [[DOTLOOPEXIT]], label [[N28]]`
AMDGPU: Fix AMDGPUUnifyDivergentExitNodes with no normal returns Summary: The code was assuming in a few places that if there was only one exit from the function that it was a normal return, which is invalid. It could be an infinite loop, in which case we still need to insert the usual fake edge so that the null export happens. This fixes shaders that end with an infinite loop that discards. Reviewers: arsenm, nhaehnle, critson Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D71192 2019-12-09 19:04:00 +08:00			`; IR: UnifiedReturnBlock:`
			`; IR-NEXT: call void @llvm.amdgcn.exp.f32(i32 9, i32 0, float undef, float undef, float undef, float undef, i1 true, i1 true)`
Update phis in AMDGPUUnifyDivergentExitNodes Original patch https://reviews.llvm.org/D63659 from Steven Perron <stevenperron@google.com> The pass AMDGPUUnifyDivergentExitNodes does not update the phi nodes in the successors of blocks that is splits. This is fixed by calling BasicBlock::splitBasicBlock to split the block instead of doing it manually. This does extra work because a new conditional branch is created in BB which is immediately replaced, but I think the simplicity is worth it. It also helps make the code more future proof in case other things need to be updated. llvm-svn: 364342 2019-06-26 02:55:16 +08:00			`; IR-NEXT: ret void`
			`;`
			`.entry:`
			`br label %.loopexit`

			`.loopexit: ; preds = %n28, %.entry`
			`br label %n28`

			`n28: ; preds = %.loopexit, %n28`
			`%.01 = phi float [ 0.000000e+00, %.loopexit ], [ %n29, %n28 ]`
			`%n29 = fadd float %.01, 1.0`
			`%n30 = fcmp ogt float %n29, 4.000000e+00`
			`br i1 %n30, label %.loopexit, label %n28`

			`n31: ; preds =`
			`ret void`
			`}`