forked from OSchip/llvm-project
40 lines
1.3 KiB
Plaintext
40 lines
1.3 KiB
Plaintext
|
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
|
||
|
// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=AMDGCN %s
|
||
|
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda- -fcuda-is-device \
|
||
|
// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=NVPTX %s
|
||
|
#include "Inputs/cuda.h"
|
||
|
|
||
|
struct A {
|
||
|
int a[32];
|
||
|
};
|
||
|
|
||
|
// AMDGCN: define amdgpu_kernel void @_Z6kernel1A(%struct.A %x.coerce)
|
||
|
// NVPTX: define void @_Z6kernel1A(%struct.A* byval align 4 %x)
|
||
|
__global__ void kernel(A x) {
|
||
|
}
|
||
|
|
||
|
class Kernel {
|
||
|
public:
|
||
|
// AMDGCN: define amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A %x.coerce)
|
||
|
// NVPTX: define void @_ZN6Kernel12memberKernelE1A(%struct.A* byval align 4 %x)
|
||
|
static __global__ void memberKernel(A x){}
|
||
|
template<typename T> static __global__ void templateMemberKernel(T x) {}
|
||
|
};
|
||
|
|
||
|
|
||
|
template <typename T>
|
||
|
__global__ void templateKernel(T x) {}
|
||
|
|
||
|
void launch(void*);
|
||
|
|
||
|
void test() {
|
||
|
Kernel K;
|
||
|
// AMDGCN: define amdgpu_kernel void @_Z14templateKernelI1AEvT_(%struct.A %x.coerce)
|
||
|
// NVPTX: define void @_Z14templateKernelI1AEvT_(%struct.A* byval align 4 %x)
|
||
|
launch((void*)templateKernel<A>);
|
||
|
|
||
|
// AMDGCN: define amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A %x.coerce)
|
||
|
// NVPTX: define void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A* byval align 4 %x)
|
||
|
launch((void*)Kernel::templateMemberKernel<A>);
|
||
|
}
|