From 4306f2086fe838591f2d1fe041ef73b643aa99ee Mon Sep 17 00:00:00 2001 From: Yaxun Liu Date: Fri, 20 Apr 2018 17:01:03 +0000 Subject: [PATCH] [CUDA] Set LLVM calling convention for CUDA kernel Some targets need special LLVM calling convention for CUDA kernel. This patch does that through a TargetCodeGenInfo hook. It only affects amdgcn target. Patch by Greg Rodgers. Revised and lit tests added by Yaxun Liu. Differential Revision: https://reviews.llvm.org/D45223 llvm-svn: 330447 --- clang/lib/CodeGen/CodeGenModule.cpp | 3 ++ clang/lib/CodeGen/TargetInfo.cpp | 6 ++++ clang/lib/CodeGen/TargetInfo.h | 2 ++ clang/test/CodeGenCUDA/kernel-amdgcn.cu | 41 +++++++++++++++++++++++++ 4 files changed, 52 insertions(+) create mode 100644 clang/test/CodeGenCUDA/kernel-amdgcn.cu diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 707b826418aa..063b9be4cd02 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -3627,6 +3627,9 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, MaybeHandleStaticInExternC(D, Fn); + if (D->hasAttr()) + getTargetCodeGenInfo().setCUDAKernelCallingConvention(Fn); + maybeSetTrivialComdat(*D, *Fn); CodeGenFunction(*this).GenerateCode(D, Fn, FI); diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 5e842fabbf8b..99e4b0de3e91 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -7637,6 +7637,7 @@ public: llvm::Function *BlockInvokeFunc, llvm::Value *BlockLiteral) const override; bool shouldEmitStaticExternCAliases() const override; + void setCUDAKernelCallingConvention(llvm::Function *F) const override; }; } @@ -7772,6 +7773,11 @@ bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { return false; } +void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention( + llvm::Function *F) const { + F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); +} + //===----------------------------------------------------------------------===// // SPARC v8 ABI Implementation. // Based on the SPARC Compliance Definition version 2.4.1. diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index 24bd731419af..5c19c7141243 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -301,6 +301,8 @@ public: /// mangled name of functions declared within an extern "C" region and marked /// as 'used', and having internal linkage. virtual bool shouldEmitStaticExternCAliases() const { return true; } + + virtual void setCUDAKernelCallingConvention(llvm::Function *F) const {} }; } // namespace CodeGen diff --git a/clang/test/CodeGenCUDA/kernel-amdgcn.cu b/clang/test/CodeGenCUDA/kernel-amdgcn.cu new file mode 100644 index 000000000000..ffa6c9549f07 --- /dev/null +++ b/clang/test/CodeGenCUDA/kernel-amdgcn.cu @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s +#include "Inputs/cuda.h" + +// CHECK: define amdgpu_kernel void @_ZN1A6kernelEv +class A { +public: + static __global__ void kernel(){} +}; + +// CHECK: define void @_Z10non_kernelv +__device__ void non_kernel(){} + +// CHECK: define amdgpu_kernel void @_Z6kerneli +__global__ void kernel(int x) { + non_kernel(); +} + +// CHECK: define amdgpu_kernel void @_Z11EmptyKernelIvEvv +template +__global__ void EmptyKernel(void) {} + +struct Dummy { + /// Type definition of the EmptyKernel kernel entry point + typedef void (*EmptyKernelPtr)(); + EmptyKernelPtr Empty() { return EmptyKernel; } +}; + +// CHECK: define amdgpu_kernel void @_Z15template_kernelI1AEvT_ +template +__global__ void template_kernel(T x) {} + +void launch(void *f); + +int main() { + Dummy D; + launch((void*)A::kernel); + launch((void*)kernel); + launch((void*)template_kernel); + launch((void*)D.Empty()); + return 0; +}