[DivergenceAnalysis] Add methods for querying divergence at use
Summary:
The existing isDivergent(Value) methods query whether a value is
divergent at its definition. However even if a value is uniform at its
definition, a use of it in another basic block can be divergent because
of divergent control flow between the def and the use.
This patch adds new isDivergent(Use) methods to DivergenceAnalysis,
LegacyDivergenceAnalysis and GPUDivergenceAnalysis.
This might allow D63953 or other similar workarounds to be removed.
Reviewers: alex-t, nhaehnle, arsenm, rtaylor, rampitec, simoll, jingyue
Reviewed By: nhaehnle
Subscribers: jfb, jvesely, wdng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65141
llvm-svn: 367218
2019-07-29 18:22:09 +08:00
|
|
|
; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizations=true < %s | FileCheck %s
|
|
|
|
; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizations=true < %s -use-gpu-divergence-analysis | FileCheck %s
|
|
|
|
|
|
|
|
@local = addrspace(3) global i32 undef
|
|
|
|
|
2019-07-29 19:48:17 +08:00
|
|
|
define amdgpu_kernel void @reducible(i32 %x) {
|
[DivergenceAnalysis] Add methods for querying divergence at use
Summary:
The existing isDivergent(Value) methods query whether a value is
divergent at its definition. However even if a value is uniform at its
definition, a use of it in another basic block can be divergent because
of divergent control flow between the def and the use.
This patch adds new isDivergent(Use) methods to DivergenceAnalysis,
LegacyDivergenceAnalysis and GPUDivergenceAnalysis.
This might allow D63953 or other similar workarounds to be removed.
Reviewers: alex-t, nhaehnle, arsenm, rtaylor, rampitec, simoll, jingyue
Reviewed By: nhaehnle
Subscribers: jfb, jvesely, wdng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65141
llvm-svn: 367218
2019-07-29 18:22:09 +08:00
|
|
|
; CHECK-LABEL: reducible:
|
|
|
|
; CHECK-NOT: dpp
|
|
|
|
entry:
|
|
|
|
br label %loop
|
|
|
|
loop:
|
|
|
|
%i = phi i32 [ 0, %entry ], [ %i1, %loop ]
|
|
|
|
%gep = getelementptr i32, i32 addrspace(3)* @local, i32 %i
|
|
|
|
%cond = icmp ult i32 %i, %x
|
|
|
|
%i1 = add i32 %i, 1
|
|
|
|
br i1 %cond, label %loop, label %exit
|
|
|
|
exit:
|
|
|
|
%old = atomicrmw add i32 addrspace(3)* %gep, i32 %x acq_rel
|
|
|
|
ret void
|
|
|
|
}
|