forked from OSchip/llvm-project
[CUDA][HIP] Set kernel calling convention before arrange function
Currently clang set kernel calling convention for CUDA/HIP after arranging function, which causes incorrect kernel function type since it depends on calling convention. This patch moves setting kernel convention before arranging function. Differential Revision: https://reviews.llvm.org/D47733 llvm-svn: 334457
This commit is contained in:
parent
9deaf68ed1
commit
6c10a66ec7
|
@ -255,6 +255,16 @@ CodeGenTypes::arrangeCXXMethodType(const CXXRecordDecl *RD,
|
|||
FTP->getCanonicalTypeUnqualified().getAs<FunctionProtoType>(), MD);
|
||||
}
|
||||
|
||||
/// Set calling convention for CUDA/HIP kernel.
|
||||
static void setCUDAKernelCallingConvention(CanQualType &FTy, CodeGenModule &CGM,
|
||||
const FunctionDecl *FD) {
|
||||
if (FD->hasAttr<CUDAGlobalAttr>()) {
|
||||
const FunctionType *FT = FTy->getAs<FunctionType>();
|
||||
CGM.getTargetCodeGenInfo().setCUDAKernelCallingConvention(FT);
|
||||
FTy = FT->getCanonicalTypeUnqualified();
|
||||
}
|
||||
}
|
||||
|
||||
/// Arrange the argument and result information for a declaration or
|
||||
/// definition of the given C++ non-static member function. The
|
||||
/// member function must be an ordinary function, i.e. not a
|
||||
|
@ -264,7 +274,9 @@ CodeGenTypes::arrangeCXXMethodDeclaration(const CXXMethodDecl *MD) {
|
|||
assert(!isa<CXXConstructorDecl>(MD) && "wrong method for constructors!");
|
||||
assert(!isa<CXXDestructorDecl>(MD) && "wrong method for destructors!");
|
||||
|
||||
CanQual<FunctionProtoType> prototype = GetFormalType(MD);
|
||||
CanQualType FT = GetFormalType(MD).getAs<Type>();
|
||||
setCUDAKernelCallingConvention(FT, CGM, MD);
|
||||
auto prototype = FT.getAs<FunctionProtoType>();
|
||||
|
||||
if (MD->isInstance()) {
|
||||
// The abstract case is perfectly fine.
|
||||
|
@ -424,6 +436,7 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) {
|
|||
CanQualType FTy = FD->getType()->getCanonicalTypeUnqualified();
|
||||
|
||||
assert(isa<FunctionType>(FTy));
|
||||
setCUDAKernelCallingConvention(FTy, CGM, FD);
|
||||
|
||||
// When declaring a function without a prototype, always use a
|
||||
// non-variadic type.
|
||||
|
|
|
@ -3671,8 +3671,6 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
|
|||
|
||||
MaybeHandleStaticInExternC(D, Fn);
|
||||
|
||||
if (D->hasAttr<CUDAGlobalAttr>())
|
||||
getTargetCodeGenInfo().setCUDAKernelCallingConvention(Fn);
|
||||
|
||||
maybeSetTrivialComdat(*D, *Fn);
|
||||
|
||||
|
|
|
@ -7646,7 +7646,7 @@ public:
|
|||
llvm::Function *BlockInvokeFunc,
|
||||
llvm::Value *BlockLiteral) const override;
|
||||
bool shouldEmitStaticExternCAliases() const override;
|
||||
void setCUDAKernelCallingConvention(llvm::Function *F) const override;
|
||||
void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -7783,8 +7783,9 @@ bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
|
|||
}
|
||||
|
||||
void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention(
|
||||
llvm::Function *F) const {
|
||||
F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
|
||||
const FunctionType *&FT) const {
|
||||
FT = getABIInfo().getContext().adjustFunctionType(
|
||||
FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -302,7 +302,7 @@ public:
|
|||
/// as 'used', and having internal linkage.
|
||||
virtual bool shouldEmitStaticExternCAliases() const { return true; }
|
||||
|
||||
virtual void setCUDAKernelCallingConvention(llvm::Function *F) const {}
|
||||
virtual void setCUDAKernelCallingConvention(const FunctionType *&FT) const {}
|
||||
};
|
||||
|
||||
} // namespace CodeGen
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
|
||||
// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=AMDGCN %s
|
||||
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda- -fcuda-is-device \
|
||||
// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=NVPTX %s
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
struct A {
|
||||
int a[32];
|
||||
};
|
||||
|
||||
// AMDGCN: define amdgpu_kernel void @_Z6kernel1A(%struct.A %x.coerce)
|
||||
// NVPTX: define void @_Z6kernel1A(%struct.A* byval align 4 %x)
|
||||
__global__ void kernel(A x) {
|
||||
}
|
||||
|
||||
class Kernel {
|
||||
public:
|
||||
// AMDGCN: define amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A %x.coerce)
|
||||
// NVPTX: define void @_ZN6Kernel12memberKernelE1A(%struct.A* byval align 4 %x)
|
||||
static __global__ void memberKernel(A x){}
|
||||
template<typename T> static __global__ void templateMemberKernel(T x) {}
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void templateKernel(T x) {}
|
||||
|
||||
void launch(void*);
|
||||
|
||||
void test() {
|
||||
Kernel K;
|
||||
// AMDGCN: define amdgpu_kernel void @_Z14templateKernelI1AEvT_(%struct.A %x.coerce)
|
||||
// NVPTX: define void @_Z14templateKernelI1AEvT_(%struct.A* byval align 4 %x)
|
||||
launch((void*)templateKernel<A>);
|
||||
|
||||
// AMDGCN: define amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A %x.coerce)
|
||||
// NVPTX: define void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A* byval align 4 %x)
|
||||
launch((void*)Kernel::templateMemberKernel<A>);
|
||||
}
|
Loading…
Reference in New Issue