[AMDGPU] Enable runtime unroll for LDS

We want to do unroll for LDS even for runtime trip count
to combine LDS operations.

Differential Revision: https://reviews.llvm.org/D75293
This commit is contained in:
Stanislav Mekhanoshin 2020-02-27 12:36:40 -08:00
parent 2301a18c1c
commit 6b813f2762
2 changed files with 42 additions and 0 deletions

View File

@ -69,6 +69,11 @@ static cl::opt<unsigned> UnrollThresholdIf(
cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"),
cl::init(150), cl::Hidden);
static cl::opt<bool> UnrollRuntimeLocal(
"amdgpu-unroll-runtime-local",
cl::desc("Allow runtime unroll for AMDGPU if local memory used in a loop"),
cl::init(true), cl::Hidden);
static cl::opt<bool> UseLegacyDA(
"amdgpu-use-legacy-divergence-analysis",
cl::desc("Enable legacy divergence analysis for AMDGPU"),
@ -177,6 +182,9 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
(!isa<GlobalVariable>(GEP->getPointerOperand()) &&
!isa<Argument>(GEP->getPointerOperand())))
continue;
LLVM_DEBUG(dbgs() << "Allow unroll runtime for loop:\n"
<< *L << " due to LDS use.\n");
UP.Runtime = UnrollRuntimeLocal;
}
// Check if GEP depends on a value defined by this loop itself.

View File

@ -99,3 +99,37 @@ for.inc: ; preds = %for.body, %if.then
for.end: ; preds = %for.cond
ret void
}
; Check that runtime unroll is enabled for local memory references
; CHECK-LABEL: @local_memory_runtime
; CHECK: loop.header:
; CHECK: load i32, i32 addrspace(3)*
; CHECK: load i32, i32 addrspace(3)*
; CHECK: br i1
; CHECK: loop.header.epil
; CHECK: load i32, i32 addrspace(3)*
; CHECK: ret
define amdgpu_kernel void @local_memory_runtime(i32 addrspace(1)* %out, i32 addrspace(3)* %lds, i32 %n) {
entry:
br label %loop.header
loop.header:
%counter = phi i32 [0, %entry], [%inc, %loop.inc]
br label %loop.body
loop.body:
%ptr_lds = getelementptr i32, i32 addrspace(3)* %lds, i32 %counter
%val = load i32, i32 addrspace(3)* %ptr_lds
%ptr_out = getelementptr i32, i32 addrspace(1)* %out, i32 %counter
store i32 %val, i32 addrspace(1)* %ptr_out
br label %loop.inc
loop.inc:
%inc = add i32 %counter, 1
%cond = icmp sge i32 %counter, %n
br i1 %cond, label %exit, label %loop.header
exit:
ret void
}