[AMDGPU] Fix -amdgpu-inline-arg-alloca-cost

Before D94153 this threshold was in a pre-scaled units.
After D94153 inlining threshold multiplier is not applied
to this portion of the threshold anymore. Restore the
threshold by applying the multiplier.

Differential Revision: https://reviews.llvm.org/D98362
This commit is contained in:
Stanislav Mekhanoshin 2021-03-12 09:19:46 -08:00
parent f9e2a62cc5
commit b7b99b0799
2 changed files with 24 additions and 1 deletions

View File

@ -1577,10 +1577,11 @@ void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
}
}
Threshold += TTI.adjustInliningThreshold(&Call);
// Finally, take the target-specific inlining threshold multiplier into
// account.
Threshold *= TTI.getInliningThresholdMultiplier();
Threshold += TTI.adjustInliningThreshold(&Call);
SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
VectorBonus = Threshold * VectorBonusPercent / 100;

View File

@ -0,0 +1,22 @@
; RUN: opt -mtriple=amdgcn--amdhsa -S -passes=inline -inline-threshold=0 -debug-only=inline-cost < %s 2>&1 | FileCheck %s
; REQUIRES: asserts
target datalayout = "A5"
; Verify we are properly adding cost of the -amdgpu-inline-arg-alloca-cost to the threshold.
; CHECK: NumAllocaArgs: 1
; CHECK: Threshold: 66000
define void @use_private_ptr_arg(float addrspace(5)* nocapture %p) {
ret void
}
define amdgpu_kernel void @test_inliner_pvt_ptr(float addrspace(1)* nocapture %a, i32 %n) {
entry:
%pvt_arr = alloca [64 x float], align 4, addrspace(5)
%to.ptr = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 0
call void @use_private_ptr_arg(float addrspace(5)* %to.ptr)
ret void
}