forked from OSchip/llvm-project
Revert "[AMDGPU] Handle functions in llvm's global ctors and dtors list"
This reverts commit d42e70b3d3
.
This commit is contained in:
parent
238139be09
commit
dce35ef104
|
@ -3142,37 +3142,6 @@ same *vendor-name*.
|
|||
a register allocator
|
||||
created spill
|
||||
location.
|
||||
".kind" string The kind of the kernel
|
||||
with the following
|
||||
values:
|
||||
|
||||
"normal"
|
||||
Regular kernels.
|
||||
|
||||
"init"
|
||||
These kernels must be
|
||||
invoked after loading
|
||||
the containing code
|
||||
object and must
|
||||
complete before any
|
||||
normal and fini
|
||||
kernels in the same
|
||||
code object are
|
||||
invoked.
|
||||
|
||||
"fini"
|
||||
These kernels must be
|
||||
invoked before
|
||||
unloading the
|
||||
containing code object
|
||||
and after all init and
|
||||
normal kernels in the
|
||||
same code object have
|
||||
been invoked and
|
||||
completed.
|
||||
|
||||
If omitted, "normal" is
|
||||
assumed.
|
||||
=================================== ============== ========= ================================
|
||||
|
||||
..
|
||||
|
|
|
@ -114,10 +114,6 @@ ModulePass *createAMDGPUFixFunctionBitcastsPass();
|
|||
void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &);
|
||||
extern char &AMDGPUFixFunctionBitcastsID;
|
||||
|
||||
ModulePass *createAMDGPUCtorDtorLoweringPass();
|
||||
void initializeAMDGPUCtorDtorLoweringPass(PassRegistry &);
|
||||
extern char &AMDGPUCtorDtorLoweringID;
|
||||
|
||||
FunctionPass *createAMDGPULowerKernelArgumentsPass();
|
||||
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &);
|
||||
extern char &AMDGPULowerKernelArgumentsID;
|
||||
|
|
|
@ -1,91 +0,0 @@
|
|||
//===-- AMDGPUCtorDtorLowering.cpp - Fix function bitcasts -------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file
|
||||
/// This pass creates a unified init and fini kernel with the required metadata
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/GlobalVariable.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/Value.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Transforms/Utils/ModuleUtils.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "amdgpu-lower-ctor-dtor"
|
||||
|
||||
namespace {
|
||||
class AMDGPUCtorDtorLowering final : public ModulePass {
|
||||
bool runOnModule(Module &M) override;
|
||||
|
||||
public:
|
||||
Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) {
|
||||
StringRef InitOrFiniKernelName = "amdgcn.device.init";
|
||||
if (!IsCtor)
|
||||
InitOrFiniKernelName = "amdgcn.device.fini";
|
||||
|
||||
Function *InitOrFiniKernel = Function::createWithDefaultAttr(
|
||||
FunctionType::get(Type::getVoidTy(M.getContext()), false),
|
||||
GlobalValue::InternalLinkage, 0, InitOrFiniKernelName, &M);
|
||||
BasicBlock *InitOrFiniKernelBB =
|
||||
BasicBlock::Create(M.getContext(), "", InitOrFiniKernel);
|
||||
ReturnInst::Create(M.getContext(), InitOrFiniKernelBB);
|
||||
|
||||
InitOrFiniKernel->setCallingConv(CallingConv::AMDGPU_KERNEL);
|
||||
if (IsCtor)
|
||||
InitOrFiniKernel->addFnAttr("device-init");
|
||||
else
|
||||
InitOrFiniKernel->addFnAttr("device-fini");
|
||||
return InitOrFiniKernel;
|
||||
}
|
||||
|
||||
void createInitOrFiniKernel(Module &M, GlobalVariable *GV, bool IsCtor) {
|
||||
if (!GV)
|
||||
return;
|
||||
ConstantArray *GA = cast<ConstantArray>(GV->getInitializer());
|
||||
if (GA->getNumOperands() == 0)
|
||||
return;
|
||||
Function *InitOrFiniKernel = createInitOrFiniKernelFunction(M, IsCtor);
|
||||
IRBuilder<> IRB(InitOrFiniKernel->getEntryBlock().getTerminator());
|
||||
for (Value *V : GA->operands()) {
|
||||
auto *CS = cast<ConstantStruct>(V);
|
||||
if (Function *F = dyn_cast<Function>(CS->getOperand(1))) {
|
||||
FunctionCallee Ctor =
|
||||
M.getOrInsertFunction(F->getName(), IRB.getVoidTy());
|
||||
IRB.CreateCall(Ctor);
|
||||
}
|
||||
}
|
||||
appendToUsed(M, {InitOrFiniKernel});
|
||||
}
|
||||
|
||||
static char ID;
|
||||
AMDGPUCtorDtorLowering() : ModulePass(ID) {}
|
||||
};
|
||||
} // End anonymous namespace
|
||||
|
||||
char AMDGPUCtorDtorLowering::ID = 0;
|
||||
char &llvm::AMDGPUCtorDtorLoweringID = AMDGPUCtorDtorLowering::ID;
|
||||
INITIALIZE_PASS(AMDGPUCtorDtorLowering, DEBUG_TYPE,
|
||||
"Lower ctors and dtors for AMDGPU", false, false)
|
||||
|
||||
ModulePass *llvm::createAMDGPUCtorDtorLoweringPass() {
|
||||
return new AMDGPUCtorDtorLowering();
|
||||
}
|
||||
|
||||
bool AMDGPUCtorDtorLowering::runOnModule(Module &M) {
|
||||
createInitOrFiniKernel(M, M.getGlobalVariable("llvm.global_ctors"),
|
||||
/*IsCtor =*/true);
|
||||
createInitOrFiniKernel(M, M.getGlobalVariable("llvm.global_dtors"),
|
||||
/*IsCtor =*/false);
|
||||
return false;
|
||||
}
|
|
@ -665,10 +665,6 @@ void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
|
|||
Func.getFnAttribute("runtime-handle").getValueAsString().str(),
|
||||
/*Copy=*/true);
|
||||
}
|
||||
if(Func.hasFnAttribute("device-init"))
|
||||
Kern[".kind"] = Kern.getDocument()->getNode("init");
|
||||
else if(Func.hasFnAttribute("device-fini"))
|
||||
Kern[".kind"] = Kern.getDocument()->getNode("fini");
|
||||
}
|
||||
|
||||
void MetadataStreamerV3::emitKernelArgs(const Function &Func,
|
||||
|
|
|
@ -349,7 +349,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
|
|||
initializeSIOptimizeVGPRLiveRangePass(*PR);
|
||||
initializeSILoadStoreOptimizerPass(*PR);
|
||||
initializeAMDGPUFixFunctionBitcastsPass(*PR);
|
||||
initializeAMDGPUCtorDtorLoweringPass(*PR);
|
||||
initializeAMDGPUAlwaysInlinePass(*PR);
|
||||
initializeAMDGPUAttributorPass(*PR);
|
||||
initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
|
||||
|
@ -1015,7 +1014,6 @@ void AMDGPUPassConfig::addIRPasses() {
|
|||
disablePass(&PatchableFunctionID);
|
||||
|
||||
addPass(createAMDGPUPrintfRuntimeBinding());
|
||||
addPass(createAMDGPUCtorDtorLoweringPass());
|
||||
|
||||
// This must occur before inlining, as the inliner will not look through
|
||||
// bitcast calls.
|
||||
|
|
|
@ -53,7 +53,6 @@ add_llvm_target(AMDGPUCodeGen
|
|||
AMDGPUCodeGenPrepare.cpp
|
||||
AMDGPUExportClustering.cpp
|
||||
AMDGPUFixFunctionBitcasts.cpp
|
||||
AMDGPUCtorDtorLowering.cpp
|
||||
AMDGPUFrameLowering.cpp
|
||||
AMDGPUHSAMetadataStreamer.cpp
|
||||
AMDGPUInstCombineIntrinsic.cpp
|
||||
|
|
|
@ -1,39 +0,0 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
|
||||
|
||||
@llvm.global_ctors = appending addrspace(1) global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 1, void ()* @foo, i8* null }, { i32, void ()*, i8* } { i32 1, void ()* @foo.5, i8* null }]
|
||||
|
||||
define internal void @foo() {
|
||||
ret void
|
||||
|
||||
}
|
||||
|
||||
define internal void @foo.5() {
|
||||
ret void
|
||||
|
||||
}
|
||||
|
||||
; CHECK: ---
|
||||
; CHECK: .kind: init
|
||||
; CHECK: .name: amdgcn.device.init
|
||||
|
||||
@llvm.global_dtors = appending addrspace(1) global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 1, void ()* @bar, i8* null }, { i32, void ()*, i8* } { i32 1, void ()* @bar.5, i8* null }]
|
||||
|
||||
define internal void @bar() {
|
||||
ret void
|
||||
|
||||
}
|
||||
|
||||
define internal void @bar.5() {
|
||||
ret void
|
||||
|
||||
}
|
||||
|
||||
; CHECK: .kind: fini
|
||||
; CHECK: .name: amdgcn.device.fini
|
||||
|
||||
; PARSER: AMDGPU HSA Metadata Parser Test: PASS
|
|
@ -31,7 +31,6 @@
|
|||
; GCN-O0-NEXT: AMDGPU Printf lowering
|
||||
; GCN-O0-NEXT: FunctionPass Manager
|
||||
; GCN-O0-NEXT: Dominator Tree Construction
|
||||
; GCN-O0-NEXT: Lower ctors and dtors for AMDGPU
|
||||
; GCN-O0-NEXT: Fix function bitcasts for AMDGPU
|
||||
; GCN-O0-NEXT: FunctionPass Manager
|
||||
; GCN-O0-NEXT: Early propagate attributes from kernels to functions
|
||||
|
@ -166,7 +165,6 @@
|
|||
; GCN-O1-NEXT: AMDGPU Printf lowering
|
||||
; GCN-O1-NEXT: FunctionPass Manager
|
||||
; GCN-O1-NEXT: Dominator Tree Construction
|
||||
; GCN-O1-NEXT: Lower ctors and dtors for AMDGPU
|
||||
; GCN-O1-NEXT: Fix function bitcasts for AMDGPU
|
||||
; GCN-O1-NEXT: FunctionPass Manager
|
||||
; GCN-O1-NEXT: Early propagate attributes from kernels to functions
|
||||
|
@ -417,7 +415,6 @@
|
|||
; GCN-O1-OPTS-NEXT: AMDGPU Printf lowering
|
||||
; GCN-O1-OPTS-NEXT: FunctionPass Manager
|
||||
; GCN-O1-OPTS-NEXT: Dominator Tree Construction
|
||||
; GCN-O1-OPTS-NEXT: Lower ctors and dtors for AMDGPU
|
||||
; GCN-O1-OPTS-NEXT: Fix function bitcasts for AMDGPU
|
||||
; GCN-O1-OPTS-NEXT: FunctionPass Manager
|
||||
; GCN-O1-OPTS-NEXT: Early propagate attributes from kernels to functions
|
||||
|
@ -701,7 +698,6 @@
|
|||
; GCN-O2-NEXT: AMDGPU Printf lowering
|
||||
; GCN-O2-NEXT: FunctionPass Manager
|
||||
; GCN-O2-NEXT: Dominator Tree Construction
|
||||
; GCN-O2-NEXT: Lower ctors and dtors for AMDGPU
|
||||
; GCN-O2-NEXT: Fix function bitcasts for AMDGPU
|
||||
; GCN-O2-NEXT: FunctionPass Manager
|
||||
; GCN-O2-NEXT: Early propagate attributes from kernels to functions
|
||||
|
@ -987,7 +983,6 @@
|
|||
; GCN-O3-NEXT: AMDGPU Printf lowering
|
||||
; GCN-O3-NEXT: FunctionPass Manager
|
||||
; GCN-O3-NEXT: Dominator Tree Construction
|
||||
; GCN-O3-NEXT: Lower ctors and dtors for AMDGPU
|
||||
; GCN-O3-NEXT: Fix function bitcasts for AMDGPU
|
||||
; GCN-O3-NEXT: FunctionPass Manager
|
||||
; GCN-O3-NEXT: Early propagate attributes from kernels to functions
|
||||
|
|
|
@ -1,21 +0,0 @@
|
|||
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-ctor-dtor < %s | FileCheck %s
|
||||
|
||||
@llvm.global_ctors = appending addrspace(1) global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 1, void ()* @foo, i8* null }]
|
||||
@llvm.global_dtors = appending addrspace(1) global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 1, void ()* @bar, i8* null }]
|
||||
|
||||
; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.init() #0
|
||||
; CHECK-NEXT: call void @foo
|
||||
|
||||
; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.fini() #1
|
||||
; CHECK-NEXT: call void @bar
|
||||
|
||||
define internal void @foo() {
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal void @bar() {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: attributes #0 = { "device-init" }
|
||||
; CHECK: attributes #1 = { "device-fini" }
|
|
@ -1,31 +0,0 @@
|
|||
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-ctor-dtor < %s | FileCheck %s
|
||||
|
||||
@llvm.global_ctors = appending addrspace(1) global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 1, void ()* @foo, i8* null }, { i32, void ()*, i8* } { i32 1, void ()* @foo.5, i8* null }]
|
||||
@llvm.global_dtors = appending addrspace(1) global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 1, void ()* @bar, i8* null }, { i32, void ()*, i8* } { i32 1, void ()* @bar.5, i8* null }]
|
||||
|
||||
; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.init() #0
|
||||
; CHECK-NEXT: call void @foo
|
||||
; CHECK-NEXT: call void @foo.5
|
||||
|
||||
; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.fini() #1
|
||||
; CHECK-NEXT: call void @bar
|
||||
; CHECK-NEXT: call void @bar.5
|
||||
|
||||
define internal void @foo() {
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal void @bar() {
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal void @foo.5() {
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal void @bar.5() {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: attributes #0 = { "device-init" }
|
||||
; CHECK: attributes #1 = { "device-fini" }
|
|
@ -133,7 +133,6 @@ static_library("LLVMAMDGPUCodeGen") {
|
|||
"AMDGPUCodeGenPrepare.cpp",
|
||||
"AMDGPUExportClustering.cpp",
|
||||
"AMDGPUFixFunctionBitcasts.cpp",
|
||||
"AMDGPUCtorDtorLowering.cpp",
|
||||
"AMDGPUFrameLowering.cpp",
|
||||
"AMDGPUGlobalISelUtils.cpp",
|
||||
"AMDGPUHSAMetadataStreamer.cpp",
|
||||
|
|
Loading…
Reference in New Issue