forked from OSchip/llvm-project
[NewPM] Disable non-trivial loop-unswitch on targets with divergence
Unswitching a loop on a non-trivial divergent branch is expensive since it serializes the execution of both version of the loop. But identifying a divergent branch needs divergence analysis, which is a function level analysis. The legacy pass manager handles this dependency by isolating such a loop transform and rerunning the required function analyses. This functionality is currently missing in the new pass manager, and there is no safe way for the SimpleLoopUnswitch pass to depend on DivergenceAnalysis. So we conservatively assume that all non-trivial branches are divergent if the target has divergence. Reviewed By: tra Differential Revision: https://reviews.llvm.org/D98958
This commit is contained in:
parent
1e56e8717f
commit
b92c8c22b9
|
@ -2901,10 +2901,20 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
|
|||
return true;
|
||||
}
|
||||
|
||||
// If we're not doing non-trivial unswitching, we're done. We both accept
|
||||
// a parameter but also check a local flag that can be used for testing
|
||||
// a debugging.
|
||||
if (!NonTrivial && !EnableNonTrivialUnswitch)
|
||||
// Check whether we should continue with non-trivial conditions.
|
||||
// EnableNonTrivialUnswitch: Global variable that forces non-trivial
|
||||
// unswitching for testing and debugging.
|
||||
// NonTrivial: Parameter that enables non-trivial unswitching for this
|
||||
// invocation of the transform. But this should be allowed only
|
||||
// for targets without branch divergence.
|
||||
//
|
||||
// FIXME: If divergence analysis becomes available to a loop
|
||||
// transform, we should allow unswitching for non-trivial uniform
|
||||
// branches even on targets that have divergence.
|
||||
// https://bugs.llvm.org/show_bug.cgi?id=48819
|
||||
bool ContinueWithNonTrivial =
|
||||
EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence());
|
||||
if (!ContinueWithNonTrivial)
|
||||
return false;
|
||||
|
||||
// Skip non-trivial unswitching for optsize functions.
|
||||
|
|
|
@ -1,47 +1,4 @@
|
|||
; RUN: opt -mtriple=amdgcn-- -O3 -S -enable-new-pm=0 %s | FileCheck %s
|
||||
|
||||
; This fails with the new pass manager:
|
||||
; https://bugs.llvm.org/show_bug.cgi?id=48819
|
||||
|
||||
; Check that loop unswitch happened and condition hoisted out of the loop.
|
||||
; Condition is uniform so all targets should perform unswitching.
|
||||
|
||||
; CHECK-LABEL: {{^}}define amdgpu_kernel void @uniform_unswitch
|
||||
; CHECK: entry:
|
||||
; CHECK-NEXT: [[LOOP_COND:%[a-z0-9]+]] = icmp
|
||||
; CHECK-NEXT: [[IF_COND:%[a-z0-9]+]] = icmp eq i32 %x, 123456
|
||||
; CHECK-NEXT: and i1 [[LOOP_COND]], [[IF_COND]]
|
||||
; CHECK-NEXT: br i1
|
||||
|
||||
define amdgpu_kernel void @uniform_unswitch(i32 * nocapture %out, i32 %n, i32 %x) {
|
||||
entry:
|
||||
%cmp6 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp6, label %for.body.lr.ph, label %for.cond.cleanup
|
||||
|
||||
for.body.lr.ph: ; preds = %entry
|
||||
%cmp1 = icmp eq i32 %x, 123456
|
||||
br label %for.body
|
||||
|
||||
for.cond.cleanup.loopexit: ; preds = %for.inc
|
||||
br label %for.cond.cleanup
|
||||
|
||||
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
|
||||
ret void
|
||||
|
||||
for.body: ; preds = %for.inc, %for.body.lr.ph
|
||||
%i.07 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
|
||||
br i1 %cmp1, label %if.then, label %for.inc
|
||||
|
||||
if.then: ; preds = %for.body
|
||||
%arrayidx = getelementptr inbounds i32, i32 * %out, i32 %i.07
|
||||
store i32 %i.07, i32 * %arrayidx, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body, %if.then
|
||||
%inc = add nuw nsw i32 %i.07, 1
|
||||
%exitcond = icmp eq i32 %inc, %n
|
||||
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
|
||||
}
|
||||
; RUN: opt -mtriple=amdgcn-- -O3 -S %s | FileCheck %s
|
||||
|
||||
; Check that loop unswitch does not happen if condition is divergent.
|
||||
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
; RUN: opt -mtriple=amdgcn-- -O3 -S %s | FileCheck %s
|
||||
; XFAIL: *
|
||||
|
||||
; Check that loop unswitch happened and condition hoisted out of the loop.
|
||||
; Condition is uniform so even targets with divergence should perform unswitching.
|
||||
|
||||
; This fails with the new pass manager:
|
||||
; https://bugs.llvm.org/show_bug.cgi?id=48819
|
||||
; The correct behaviour (allow uniform non-trivial branches to be
|
||||
; unswitched on all targets) requires access to the function-level
|
||||
; divergence analysis from a loop transform, which is currently not
|
||||
; supported in the new pass manager.
|
||||
|
||||
; CHECK-LABEL: {{^}}define amdgpu_kernel void @uniform_unswitch
|
||||
; CHECK: entry:
|
||||
; CHECK-NEXT: [[LOOP_COND:%[a-z0-9]+]] = icmp
|
||||
; CHECK-NEXT: [[IF_COND:%[a-z0-9]+]] = icmp eq i32 %x, 123456
|
||||
; CHECK-NEXT: and i1 [[LOOP_COND]], [[IF_COND]]
|
||||
; CHECK-NEXT: br i1
|
||||
|
||||
define amdgpu_kernel void @uniform_unswitch(i32 * nocapture %out, i32 %n, i32 %x) {
|
||||
entry:
|
||||
%cmp6 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp6, label %for.body.lr.ph, label %for.cond.cleanup
|
||||
|
||||
for.body.lr.ph: ; preds = %entry
|
||||
%cmp1 = icmp eq i32 %x, 123456
|
||||
br label %for.body
|
||||
|
||||
for.cond.cleanup.loopexit: ; preds = %for.inc
|
||||
br label %for.cond.cleanup
|
||||
|
||||
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
|
||||
ret void
|
||||
|
||||
for.body: ; preds = %for.inc, %for.body.lr.ph
|
||||
%i.07 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
|
||||
br i1 %cmp1, label %if.then, label %for.inc
|
||||
|
||||
if.then: ; preds = %for.body
|
||||
%arrayidx = getelementptr inbounds i32, i32 * %out, i32 %i.07
|
||||
store i32 %i.07, i32 * %arrayidx, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body, %if.then
|
||||
%inc = add nuw nsw i32 %i.07, 1
|
||||
%exitcond = icmp eq i32 %inc, %n
|
||||
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
Loading…
Reference in New Issue