forked from OSchip/llvm-project
[AMDGPU] Unroll more to eliminate phis and conditions
Increase threshold to unroll a loop which contains an "if" statement whose condition defined by a PHI belonging to the loop. This may help to eliminate if region and potentially even PHI itself, saving on both divergence and registers used for the PHI. Add a small bonus for each of such "if" statements. Differential Revision: https://reviews.llvm.org/D31693 llvm-svn: 299779
This commit is contained in:
parent
e4c8b9b78c
commit
478b81982f
|
@ -32,13 +32,37 @@ using namespace llvm;
|
|||
static cl::opt<unsigned> UnrollThresholdPrivate(
|
||||
"amdgpu-unroll-threshold-private",
|
||||
cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"),
|
||||
cl::init(2000), cl::Hidden);
|
||||
cl::init(2500), cl::Hidden);
|
||||
|
||||
static cl::opt<unsigned> UnrollThresholdLocal(
|
||||
"amdgpu-unroll-threshold-local",
|
||||
cl::desc("Unroll threshold for AMDGPU if local memory used in a loop"),
|
||||
cl::init(1000), cl::Hidden);
|
||||
|
||||
static cl::opt<unsigned> UnrollThresholdIf(
|
||||
"amdgpu-unroll-threshold-if",
|
||||
cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"),
|
||||
cl::init(150), cl::Hidden);
|
||||
|
||||
static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
|
||||
unsigned Depth = 0) {
|
||||
const Instruction *I = dyn_cast<Instruction>(Cond);
|
||||
if (!I)
|
||||
return false;
|
||||
|
||||
for (const Value *V : I->operand_values()) {
|
||||
if (!L->contains(I))
|
||||
continue;
|
||||
if (const PHINode *PHI = dyn_cast<PHINode>(V)) {
|
||||
if (none_of(L->getSubLoops(), [PHI](const Loop* SubLoop) {
|
||||
return SubLoop->contains(PHI); }))
|
||||
return true;
|
||||
} else if (Depth < 10 && dependsOnLocalPhi(L, V, Depth+1))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
|
||||
TTI::UnrollingPreferences &UP) {
|
||||
UP.Threshold = 300; // Twice the default.
|
||||
|
@ -57,7 +81,33 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
|
|||
const DataLayout &DL = BB->getModule()->getDataLayout();
|
||||
unsigned LocalGEPsSeen = 0;
|
||||
|
||||
if (any_of(L->getSubLoops(), [BB](const Loop* SubLoop) {
|
||||
return SubLoop->contains(BB); }))
|
||||
continue; // Block belongs to an inner loop.
|
||||
|
||||
for (const Instruction &I : *BB) {
|
||||
|
||||
// Unroll a loop which contains an "if" statement whose condition
|
||||
// defined by a PHI belonging to the loop. This may help to eliminate
|
||||
// if region and potentially even PHI itself, saving on both divergence
|
||||
// and registers used for the PHI.
|
||||
// Add a small bonus for each of such "if" statements.
|
||||
if (const BranchInst *Br = dyn_cast<BranchInst>(&I)) {
|
||||
if (UP.Threshold < MaxBoost && Br->isConditional()) {
|
||||
if (L->isLoopExiting(Br->getSuccessor(0)) ||
|
||||
L->isLoopExiting(Br->getSuccessor(1)))
|
||||
continue;
|
||||
if (dependsOnLocalPhi(L, Br->getCondition())) {
|
||||
UP.Threshold += UnrollThresholdIf;
|
||||
DEBUG(dbgs() << "Set unroll threshold " << UP.Threshold
|
||||
<< " for loop:\n" << *L << " due to " << *Br << '\n');
|
||||
if (UP.Threshold >= MaxBoost)
|
||||
return;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
|
||||
if (!GEP)
|
||||
continue;
|
||||
|
@ -128,7 +178,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
|
|||
UP.Threshold = Threshold;
|
||||
DEBUG(dbgs() << "Set unroll threshold " << Threshold << " for loop:\n"
|
||||
<< *L << " due to " << *GEP << '\n');
|
||||
if (UP.Threshold == MaxBoost)
|
||||
if (UP.Threshold >= MaxBoost)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -64,3 +64,37 @@ loop.inc:
|
|||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that a loop with if inside completely unrolled to eliminate phi and if
|
||||
|
||||
; CHECK-LABEL: @unroll_for_if
|
||||
; CHECK: entry:
|
||||
; CHECK-NEXT: getelementptr
|
||||
; CHECK-NEXT: store
|
||||
; CHECK-NEXT: getelementptr
|
||||
; CHECK-NEXT: store
|
||||
; CHECK-NOT: br
|
||||
define amdgpu_kernel void @unroll_for_if(i32* %a) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %entry, %for.inc
|
||||
%i1 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%and = and i32 %i1, 1
|
||||
%tobool = icmp eq i32 %and, 0
|
||||
br i1 %tobool, label %for.inc, label %if.then
|
||||
|
||||
if.then: ; preds = %for.body
|
||||
%0 = sext i32 %i1 to i64
|
||||
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %0
|
||||
store i32 0, i32* %arrayidx, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body, %if.then
|
||||
%inc = add nuw nsw i32 %i1, 1
|
||||
%cmp = icmp ult i32 %inc, 48
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue