llvm-project/llvm/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll

; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s

target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"

; This test contains an unstructured loop.
;           +-------------- entry ----------------+
;           |                                     |
;           V                                     V
; i1 = phi(0, i3)                            i2 = phi(0, i3)
;     j1 = i1 + 1 ---> i3 = phi(j1, j2) <--- j2 = i2 + 2
;           ^                 |                   ^
;           |                 V                   |
;           +-------- switch (tid / i3) ----------+
;                             |
;                             V
;                        if (i3 == 5) // divergent
; because sync dependent on (tid / i3).
define i32 @unstructured_loop(i1 %entry_cond) {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'unstructured_loop'
entry:
  %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
  br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2
loop_entry_1:
  %i1 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ]
  %j1 = add i32 %i1, 1
  br label %loop_body
loop_entry_2:
  %i2 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ]
  %j2 = add i32 %i2, 2
  br label %loop_body
loop_body:
  %i3 = phi i32 [ %j1, %loop_entry_1 ], [ %j2, %loop_entry_2 ]
  br label %loop_latch
loop_latch:
  %div = sdiv i32 %tid, %i3
  switch i32 %div, label %branch [ i32 1, label %loop_entry_1
                                   i32 2, label %loop_entry_2 ]
branch:
  %cmp = icmp eq i32 %i3, 5
  br i1 %cmp, label %then, label %else
; CHECK: DIVERGENT: br i1 %cmp,
then:
  ret i32 0
else:
  ret i32 1
}

declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
declare i32 @llvm.nvvm.read.ptx.sreg.laneid()

!nvvm.annotations = !{!0}
!0 = !{i32 (i1)* @unstructured_loop, !"kernel", i32 1}
[DA] GPUDivergenceAnalysis for unstructured GPU kernels Summary: This is patch #3 of the new DivergenceAnalysis <https://lists.llvm.org/pipermail/llvm-dev/2018-May/123606.html> The GPUDivergenceAnalysis is intended to eventually supersede the existing LegacyDivergenceAnalysis. The existing LegacyDivergenceAnalysis produces incorrect results on unstructured Control-Flow Graphs: <https://bugs.llvm.org/show_bug.cgi?id=37185> This patch adds the option -use-gpu-divergence-analysis to the LegacyDivergenceAnalysis to turn it into a transparent wrapper for the GPUDivergenceAnalysis. Reviewers: nhaehnle Reviewed By: nhaehnle Subscribers: jholewinski, jvesely, jfb, llvm-commits, alex-t, sameerds, arsenm, nhaehnle Differential Revision: https://reviews.llvm.org/D53493 llvm-svn: 348048 2018-12-01 06:55:20 +08:00			`; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis \| FileCheck %s`

			`target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"`
			`target triple = "nvptx64-nvidia-cuda"`

			`; This test contains an unstructured loop.`
			`; +-------------- entry ----------------+`
			`; \| \|`
			`; V V`
			`; i1 = phi(0, i3) i2 = phi(0, i3)`
			`; j1 = i1 + 1 ---> i3 = phi(j1, j2) <--- j2 = i2 + 2`
			`; ^ \| ^`
			`; \| V \|`
			`; +-------- switch (tid / i3) ----------+`
			`; \|`
			`; V`
			`; if (i3 == 5) // divergent`
			`; because sync dependent on (tid / i3).`
			`define i32 @unstructured_loop(i1 %entry_cond) {`
			`; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'unstructured_loop'`
			`entry:`
			`%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()`
			`br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2`
			`loop_entry_1:`
			`%i1 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ]`
			`%j1 = add i32 %i1, 1`
			`br label %loop_body`
			`loop_entry_2:`
			`%i2 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ]`
			`%j2 = add i32 %i2, 2`
			`br label %loop_body`
			`loop_body:`
			`%i3 = phi i32 [ %j1, %loop_entry_1 ], [ %j2, %loop_entry_2 ]`
			`br label %loop_latch`
			`loop_latch:`
			`%div = sdiv i32 %tid, %i3`
			`switch i32 %div, label %branch [ i32 1, label %loop_entry_1`
			`i32 2, label %loop_entry_2 ]`
			`branch:`
			`%cmp = icmp eq i32 %i3, 5`
			`br i1 %cmp, label %then, label %else`
			`; CHECK: DIVERGENT: br i1 %cmp,`
			`then:`
			`ret i32 0`
			`else:`
			`ret i32 1`
			`}`

			`declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()`
			`declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()`
			`declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()`
			`declare i32 @llvm.nvvm.read.ptx.sreg.laneid()`

			`!nvvm.annotations = !{!0}`
			`!0 = !{i32 (i1)* @unstructured_loop, !"kernel", i32 1}`