forked from OSchip/llvm-project
[AMDGPU] Disable forceful inline of non-kernel functions which use LDS.
Now since LDS uses within non-kernel functions are being handled in the pass - LowerModuleLDS, we *NO* need to *forcefully* inline non-kernel functions just because they use LDS. Do forceful inlining only when the pass - LowerModuleLDS is not enabled. It is enabled by default. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D100481
This commit is contained in:
parent
90ae4d9030
commit
4973b0c4e7
|
@ -120,10 +120,10 @@ static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {
|
||||||
for (GlobalVariable &GV : M.globals()) {
|
for (GlobalVariable &GV : M.globals()) {
|
||||||
// TODO: Region address
|
// TODO: Region address
|
||||||
unsigned AS = GV.getAddressSpace();
|
unsigned AS = GV.getAddressSpace();
|
||||||
if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)
|
if ((AS == AMDGPUAS::REGION_ADDRESS) ||
|
||||||
continue;
|
(AS == AMDGPUAS::LOCAL_ADDRESS &&
|
||||||
|
!AMDGPUTargetMachine::EnableLowerModuleLDS))
|
||||||
recursivelyVisitUsers(GV, FuncsToAlwaysInline);
|
recursivelyVisitUsers(GV, FuncsToAlwaysInline);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) {
|
if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) {
|
||||||
|
|
|
@ -193,10 +193,10 @@ static cl::opt<bool> EnableStructurizerWorkarounds(
|
||||||
cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true),
|
cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true),
|
||||||
cl::Hidden);
|
cl::Hidden);
|
||||||
|
|
||||||
static cl::opt<bool>
|
static cl::opt<bool, true> EnableLowerModuleLDS(
|
||||||
EnableLowerModuleLDS("amdgpu-enable-lower-module-lds", cl::Hidden,
|
"amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),
|
||||||
cl::desc("Enable lower module lds pass"),
|
cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true),
|
||||||
cl::init(true));
|
cl::Hidden);
|
||||||
|
|
||||||
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
|
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
|
||||||
// Register the target
|
// Register the target
|
||||||
|
@ -393,6 +393,7 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
|
||||||
bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false;
|
bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false;
|
||||||
bool AMDGPUTargetMachine::EnableFunctionCalls = false;
|
bool AMDGPUTargetMachine::EnableFunctionCalls = false;
|
||||||
bool AMDGPUTargetMachine::EnableFixedFunctionABI = false;
|
bool AMDGPUTargetMachine::EnableFixedFunctionABI = false;
|
||||||
|
bool AMDGPUTargetMachine::EnableLowerModuleLDS = true;
|
||||||
|
|
||||||
AMDGPUTargetMachine::~AMDGPUTargetMachine() = default;
|
AMDGPUTargetMachine::~AMDGPUTargetMachine() = default;
|
||||||
|
|
||||||
|
|
|
@ -35,6 +35,7 @@ public:
|
||||||
static bool EnableLateStructurizeCFG;
|
static bool EnableLateStructurizeCFG;
|
||||||
static bool EnableFunctionCalls;
|
static bool EnableFunctionCalls;
|
||||||
static bool EnableFixedFunctionABI;
|
static bool EnableFixedFunctionABI;
|
||||||
|
static bool EnableLowerModuleLDS;
|
||||||
|
|
||||||
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
|
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
|
||||||
StringRef FS, TargetOptions Options,
|
StringRef FS, TargetOptions Options,
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-function-calls -amdgpu-stress-function-calls < %s | FileCheck -check-prefix=GCN %s
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-function-calls -amdgpu-stress-function-calls -amdgpu-enable-lower-module-lds=false < %s | FileCheck -check-prefix=GCN %s
|
||||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-stress-function-calls < %s | FileCheck -check-prefix=GCN %s
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-stress-function-calls -amdgpu-enable-lower-module-lds=false < %s | FileCheck -check-prefix=GCN %s
|
||||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=GCN %s
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-lower-module-lds=false < %s | FileCheck -check-prefix=GCN %s
|
||||||
|
|
||||||
@lds0 = addrspace(3) global i32 undef, align 4
|
@lds0 = addrspace(3) global i32 undef, align 4
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-always-inline %s | FileCheck --check-prefix=ALL %s
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-always-inline -amdgpu-enable-lower-module-lds=false %s | FileCheck --check-prefix=ALL %s
|
||||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-always-inline %s | FileCheck --check-prefix=ALL %s
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-always-inline -amdgpu-enable-lower-module-lds=false %s | FileCheck --check-prefix=ALL %s
|
||||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-stress-function-calls -amdgpu-always-inline %s | FileCheck --check-prefix=ALL %s
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-stress-function-calls -amdgpu-always-inline -amdgpu-enable-lower-module-lds=false %s | FileCheck --check-prefix=ALL %s
|
||||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-stress-function-calls -passes=amdgpu-always-inline %s | FileCheck --check-prefix=ALL %s
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-stress-function-calls -passes=amdgpu-always-inline -amdgpu-enable-lower-module-lds=false %s | FileCheck --check-prefix=ALL %s
|
||||||
|
|
||||||
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
|
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue