forked from OSchip/llvm-project
[CUDA] Set LLVM calling convention for CUDA kernel
Some targets need special LLVM calling convention for CUDA kernel. This patch does that through a TargetCodeGenInfo hook. It only affects amdgcn target. Patch by Greg Rodgers. Revised and lit tests added by Yaxun Liu. Differential Revision: https://reviews.llvm.org/D45223 llvm-svn: 330447
This commit is contained in:
parent
e268304122
commit
4306f2086f
|
@ -3627,6 +3627,9 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
|
|||
|
||||
MaybeHandleStaticInExternC(D, Fn);
|
||||
|
||||
if (D->hasAttr<CUDAGlobalAttr>())
|
||||
getTargetCodeGenInfo().setCUDAKernelCallingConvention(Fn);
|
||||
|
||||
maybeSetTrivialComdat(*D, *Fn);
|
||||
|
||||
CodeGenFunction(*this).GenerateCode(D, Fn, FI);
|
||||
|
|
|
@ -7637,6 +7637,7 @@ public:
|
|||
llvm::Function *BlockInvokeFunc,
|
||||
llvm::Value *BlockLiteral) const override;
|
||||
bool shouldEmitStaticExternCAliases() const override;
|
||||
void setCUDAKernelCallingConvention(llvm::Function *F) const override;
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -7772,6 +7773,11 @@ bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
|
|||
return false;
|
||||
}
|
||||
|
||||
void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention(
|
||||
llvm::Function *F) const {
|
||||
F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SPARC v8 ABI Implementation.
|
||||
// Based on the SPARC Compliance Definition version 2.4.1.
|
||||
|
|
|
@ -301,6 +301,8 @@ public:
|
|||
/// mangled name of functions declared within an extern "C" region and marked
|
||||
/// as 'used', and having internal linkage.
|
||||
virtual bool shouldEmitStaticExternCAliases() const { return true; }
|
||||
|
||||
virtual void setCUDAKernelCallingConvention(llvm::Function *F) const {}
|
||||
};
|
||||
|
||||
} // namespace CodeGen
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
// CHECK: define amdgpu_kernel void @_ZN1A6kernelEv
|
||||
class A {
|
||||
public:
|
||||
static __global__ void kernel(){}
|
||||
};
|
||||
|
||||
// CHECK: define void @_Z10non_kernelv
|
||||
__device__ void non_kernel(){}
|
||||
|
||||
// CHECK: define amdgpu_kernel void @_Z6kerneli
|
||||
__global__ void kernel(int x) {
|
||||
non_kernel();
|
||||
}
|
||||
|
||||
// CHECK: define amdgpu_kernel void @_Z11EmptyKernelIvEvv
|
||||
template <typename T>
|
||||
__global__ void EmptyKernel(void) {}
|
||||
|
||||
struct Dummy {
|
||||
/// Type definition of the EmptyKernel kernel entry point
|
||||
typedef void (*EmptyKernelPtr)();
|
||||
EmptyKernelPtr Empty() { return EmptyKernel<void>; }
|
||||
};
|
||||
|
||||
// CHECK: define amdgpu_kernel void @_Z15template_kernelI1AEvT_
|
||||
template<class T>
|
||||
__global__ void template_kernel(T x) {}
|
||||
|
||||
void launch(void *f);
|
||||
|
||||
int main() {
|
||||
Dummy D;
|
||||
launch((void*)A::kernel);
|
||||
launch((void*)kernel);
|
||||
launch((void*)template_kernel<A>);
|
||||
launch((void*)D.Empty());
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue