forked from OSchip/llvm-project
[AMDGPU] Fix -amdgpu-inline-arg-alloca-cost
Before D94153 this threshold was in a pre-scaled units. After D94153 inlining threshold multiplier is not applied to this portion of the threshold anymore. Restore the threshold by applying the multiplier. Differential Revision: https://reviews.llvm.org/D98362
This commit is contained in:
parent
f9e2a62cc5
commit
b7b99b0799
|
@ -1577,10 +1577,11 @@ void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
|
|||
}
|
||||
}
|
||||
|
||||
Threshold += TTI.adjustInliningThreshold(&Call);
|
||||
|
||||
// Finally, take the target-specific inlining threshold multiplier into
|
||||
// account.
|
||||
Threshold *= TTI.getInliningThresholdMultiplier();
|
||||
Threshold += TTI.adjustInliningThreshold(&Call);
|
||||
|
||||
SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
|
||||
VectorBonus = Threshold * VectorBonusPercent / 100;
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
; RUN: opt -mtriple=amdgcn--amdhsa -S -passes=inline -inline-threshold=0 -debug-only=inline-cost < %s 2>&1 | FileCheck %s
|
||||
|
||||
; REQUIRES: asserts
|
||||
|
||||
target datalayout = "A5"
|
||||
|
||||
; Verify we are properly adding cost of the -amdgpu-inline-arg-alloca-cost to the threshold.
|
||||
|
||||
; CHECK: NumAllocaArgs: 1
|
||||
; CHECK: Threshold: 66000
|
||||
|
||||
define void @use_private_ptr_arg(float addrspace(5)* nocapture %p) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_inliner_pvt_ptr(float addrspace(1)* nocapture %a, i32 %n) {
|
||||
entry:
|
||||
%pvt_arr = alloca [64 x float], align 4, addrspace(5)
|
||||
%to.ptr = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 0
|
||||
call void @use_private_ptr_arg(float addrspace(5)* %to.ptr)
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue