[AMDGPU] Unroll more to eliminate phis and conditions

Increase threshold to unroll a loop which contains an "if" statement
whose condition defined by a PHI belonging to the loop. This may help
to eliminate if region and potentially even PHI itself, saving on
both divergence and registers used for the PHI.

Add a small bonus for each of such "if" statements.

Differential Revision: https://reviews.llvm.org/D31693

llvm-svn: 299779
This commit is contained in:
Stanislav Mekhanoshin 2017-04-07 16:26:28 +00:00
parent e4c8b9b78c
commit 478b81982f
2 changed files with 86 additions and 2 deletions

View File

@ -32,13 +32,37 @@ using namespace llvm;
static cl::opt<unsigned> UnrollThresholdPrivate(
"amdgpu-unroll-threshold-private",
cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"),
cl::init(2000), cl::Hidden);
cl::init(2500), cl::Hidden);
static cl::opt<unsigned> UnrollThresholdLocal(
"amdgpu-unroll-threshold-local",
cl::desc("Unroll threshold for AMDGPU if local memory used in a loop"),
cl::init(1000), cl::Hidden);
static cl::opt<unsigned> UnrollThresholdIf(
"amdgpu-unroll-threshold-if",
cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"),
cl::init(150), cl::Hidden);
static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
unsigned Depth = 0) {
const Instruction *I = dyn_cast<Instruction>(Cond);
if (!I)
return false;
for (const Value *V : I->operand_values()) {
if (!L->contains(I))
continue;
if (const PHINode *PHI = dyn_cast<PHINode>(V)) {
if (none_of(L->getSubLoops(), [PHI](const Loop* SubLoop) {
return SubLoop->contains(PHI); }))
return true;
} else if (Depth < 10 && dependsOnLocalPhi(L, V, Depth+1))
return true;
}
return false;
}
void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
TTI::UnrollingPreferences &UP) {
UP.Threshold = 300; // Twice the default.
@ -57,7 +81,33 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
const DataLayout &DL = BB->getModule()->getDataLayout();
unsigned LocalGEPsSeen = 0;
if (any_of(L->getSubLoops(), [BB](const Loop* SubLoop) {
return SubLoop->contains(BB); }))
continue; // Block belongs to an inner loop.
for (const Instruction &I : *BB) {
// Unroll a loop which contains an "if" statement whose condition
// defined by a PHI belonging to the loop. This may help to eliminate
// if region and potentially even PHI itself, saving on both divergence
// and registers used for the PHI.
// Add a small bonus for each of such "if" statements.
if (const BranchInst *Br = dyn_cast<BranchInst>(&I)) {
if (UP.Threshold < MaxBoost && Br->isConditional()) {
if (L->isLoopExiting(Br->getSuccessor(0)) ||
L->isLoopExiting(Br->getSuccessor(1)))
continue;
if (dependsOnLocalPhi(L, Br->getCondition())) {
UP.Threshold += UnrollThresholdIf;
DEBUG(dbgs() << "Set unroll threshold " << UP.Threshold
<< " for loop:\n" << *L << " due to " << *Br << '\n');
if (UP.Threshold >= MaxBoost)
return;
}
}
continue;
}
const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
if (!GEP)
continue;
@ -128,7 +178,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
UP.Threshold = Threshold;
DEBUG(dbgs() << "Set unroll threshold " << Threshold << " for loop:\n"
<< *L << " due to " << *GEP << '\n');
if (UP.Threshold == MaxBoost)
if (UP.Threshold >= MaxBoost)
return;
}
}

View File

@ -64,3 +64,37 @@ loop.inc:
exit:
ret void
}
; Check that a loop with if inside completely unrolled to eliminate phi and if
; CHECK-LABEL: @unroll_for_if
; CHECK: entry:
; CHECK-NEXT: getelementptr
; CHECK-NEXT: store
; CHECK-NEXT: getelementptr
; CHECK-NEXT: store
; CHECK-NOT: br
define amdgpu_kernel void @unroll_for_if(i32* %a) {
entry:
br label %for.body
for.body: ; preds = %entry, %for.inc
%i1 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%and = and i32 %i1, 1
%tobool = icmp eq i32 %and, 0
br i1 %tobool, label %for.inc, label %if.then
if.then: ; preds = %for.body
%0 = sext i32 %i1 to i64
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %0
store i32 0, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body, %if.then
%inc = add nuw nsw i32 %i1, 1
%cmp = icmp ult i32 %inc, 48
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.cond
ret void
}