forked from OSchip/llvm-project
[AMDGPU] Tune inlining parameters for AMDGPU target (part 2)
Summary:
Most of IR instructions got better code size estimations after commit 47a5c36b
.
So default parameters values should be updated to improve inlining and
unrolling for the target.
Reviewers: rampitec, arsenm
Reviewed By: rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, zzheng, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70391
This commit is contained in:
parent
6de85095ed
commit
6fd11b14f6
|
@ -39,7 +39,7 @@ using namespace llvm;
|
|||
#define DEBUG_TYPE "inline"
|
||||
|
||||
static cl::opt<int>
|
||||
ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(1500),
|
||||
ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(4000),
|
||||
cl::desc("Cost of alloca argument"));
|
||||
|
||||
// If the amount of scratch memory to eliminate exceeds our ability to allocate
|
||||
|
|
|
@ -57,7 +57,7 @@ using namespace llvm;
|
|||
static cl::opt<unsigned> UnrollThresholdPrivate(
|
||||
"amdgpu-unroll-threshold-private",
|
||||
cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"),
|
||||
cl::init(2000), cl::Hidden);
|
||||
cl::init(2700), cl::Hidden);
|
||||
|
||||
static cl::opt<unsigned> UnrollThresholdLocal(
|
||||
"amdgpu-unroll-threshold-local",
|
||||
|
|
|
@ -204,7 +204,7 @@ public:
|
|||
bool areInlineCompatible(const Function *Caller,
|
||||
const Function *Callee) const;
|
||||
|
||||
unsigned getInliningThresholdMultiplier() { return 9; }
|
||||
unsigned getInliningThresholdMultiplier() { return 11; }
|
||||
|
||||
int getInlinerVectorBonusPercent() { return 0; }
|
||||
|
||||
|
|
|
@ -28,8 +28,15 @@ if.end: ; preds = %if.then, %entry
|
|||
define coldcc void @foo_private_ptr2(float addrspace(5)* nocapture %p1, float addrspace(5)* nocapture %p2) {
|
||||
entry:
|
||||
%tmp1 = load float, float addrspace(5)* %p1, align 4
|
||||
%cmp = fcmp ogt float %tmp1, 1.000000e+00
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
%div = fdiv float 2.000000e+00, %tmp1
|
||||
store float %div, float addrspace(5)* %p2, align 4
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: opt -data-layout=A5 -mtriple=amdgcn-unknown-amdhsa -loop-unroll -S -amdgpu-unroll-threshold-private=12000 %s | FileCheck %s
|
||||
; RUN: opt -data-layout=A5 -mtriple=amdgcn-unknown-amdhsa -loop-unroll -S %s | FileCheck %s
|
||||
|
||||
; Check that we full unroll loop to be able to eliminate alloca
|
||||
; CHECK-LABEL: @non_invariant_ind
|
||||
|
|
Loading…
Reference in New Issue