[AMDGPU] Tune inlining parameters for AMDGPU target (part 2)

Summary:
Most of IR instructions got better code size estimations after commit 47a5c36b.
So default parameters values should be updated to improve inlining and
unrolling for the target.

Reviewers: rampitec, arsenm

Reviewed By: rampitec

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, zzheng, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70391
This commit is contained in:
dfukalov 2019-11-18 16:42:34 +03:00
parent 6de85095ed
commit 6fd11b14f6
5 changed files with 11 additions and 4 deletions

View File

@ -39,7 +39,7 @@ using namespace llvm;
#define DEBUG_TYPE "inline"
static cl::opt<int>
ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(1500),
ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(4000),
cl::desc("Cost of alloca argument"));
// If the amount of scratch memory to eliminate exceeds our ability to allocate

View File

@ -57,7 +57,7 @@ using namespace llvm;
static cl::opt<unsigned> UnrollThresholdPrivate(
"amdgpu-unroll-threshold-private",
cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"),
cl::init(2000), cl::Hidden);
cl::init(2700), cl::Hidden);
static cl::opt<unsigned> UnrollThresholdLocal(
"amdgpu-unroll-threshold-local",

View File

@ -204,7 +204,7 @@ public:
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
unsigned getInliningThresholdMultiplier() { return 9; }
unsigned getInliningThresholdMultiplier() { return 11; }
int getInlinerVectorBonusPercent() { return 0; }

View File

@ -28,8 +28,15 @@ if.end: ; preds = %if.then, %entry
define coldcc void @foo_private_ptr2(float addrspace(5)* nocapture %p1, float addrspace(5)* nocapture %p2) {
entry:
%tmp1 = load float, float addrspace(5)* %p1, align 4
%cmp = fcmp ogt float %tmp1, 1.000000e+00
br i1 %cmp, label %if.then, label %if.end
if.then:
%div = fdiv float 2.000000e+00, %tmp1
store float %div, float addrspace(5)* %p2, align 4
br label %if.end
if.end:
ret void
}

View File

@ -1,4 +1,4 @@
; RUN: opt -data-layout=A5 -mtriple=amdgcn-unknown-amdhsa -loop-unroll -S -amdgpu-unroll-threshold-private=12000 %s | FileCheck %s
; RUN: opt -data-layout=A5 -mtriple=amdgcn-unknown-amdhsa -loop-unroll -S %s | FileCheck %s
; Check that we full unroll loop to be able to eliminate alloca
; CHECK-LABEL: @non_invariant_ind