llvm-project/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu

// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
// RUN:     -fcuda-is-device -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple nvptx \
// RUN:     -fcuda-is-device -emit-llvm -o - %s | FileCheck %s \
// RUN:     -check-prefix=NAMD
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \
// RUN:     -verify -o - %s | FileCheck -check-prefix=NAMD %s

#include "Inputs/cuda.h"

__attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics
__global__ void flat_work_group_size_32_64() {
// CHECK: define amdgpu_kernel void @_Z26flat_work_group_size_32_64v() [[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]]
}
__attribute__((amdgpu_waves_per_eu(2))) // expected-no-diagnostics
__global__ void waves_per_eu_2() {
// CHECK: define amdgpu_kernel void @_Z14waves_per_eu_2v() [[WAVES_PER_EU_2:#[0-9]+]]
}
__attribute__((amdgpu_num_sgpr(32))) // expected-no-diagnostics
__global__ void num_sgpr_32() {
// CHECK: define amdgpu_kernel void @_Z11num_sgpr_32v() [[NUM_SGPR_32:#[0-9]+]]
}
__attribute__((amdgpu_num_vgpr(64))) // expected-no-diagnostics
__global__ void num_vgpr_64() {
// CHECK: define amdgpu_kernel void @_Z11num_vgpr_64v() [[NUM_VGPR_64:#[0-9]+]]
}

// Make sure this is silently accepted on other targets.
// NAMD-NOT: "amdgpu-flat-work-group-size"
// NAMD-NOT: "amdgpu-waves-per-eu"
// NAMD-NOT: "amdgpu-num-vgpr"
// NAMD-NOT: "amdgpu-num-sgpr"

// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" 
// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-waves-per-eu"="2"
// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32" 
// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-vgpr"="64"
[CUDA][HIP] Allow CUDA __global__ functions to have amdgpu kernel attributes There are HIP applications e.g. Tensorflow 1.3 using amdgpu kernel attributes, however currently they are only allowed on OpenCL kernel functions. This patch will allow amdgpu kernel attributes to be applied to CUDA/HIP __global__ functions. Differential Revision: https://reviews.llvm.org/D47958 llvm-svn: 334561 2018-06-13 07:58:59 +08:00			`// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \`
			`// RUN: -fcuda-is-device -emit-llvm -o - %s \| FileCheck %s`
			`// RUN: %clang_cc1 -triple nvptx \`
			`// RUN: -fcuda-is-device -emit-llvm -o - %s \| FileCheck %s \`
			`// RUN: -check-prefix=NAMD`
			`// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \`
			`// RUN: -verify -o - %s \| FileCheck -check-prefix=NAMD %s`

			`#include "Inputs/cuda.h"`

			`__attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics`
			`__global__ void flat_work_group_size_32_64() {`
			`// CHECK: define amdgpu_kernel void @_Z26flat_work_group_size_32_64v() [[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]]`
			`}`
			`__attribute__((amdgpu_waves_per_eu(2))) // expected-no-diagnostics`
			`__global__ void waves_per_eu_2() {`
			`// CHECK: define amdgpu_kernel void @_Z14waves_per_eu_2v() [[WAVES_PER_EU_2:#[0-9]+]]`
			`}`
			`__attribute__((amdgpu_num_sgpr(32))) // expected-no-diagnostics`
			`__global__ void num_sgpr_32() {`
			`// CHECK: define amdgpu_kernel void @_Z11num_sgpr_32v() [[NUM_SGPR_32:#[0-9]+]]`
			`}`
			`__attribute__((amdgpu_num_vgpr(64))) // expected-no-diagnostics`
			`__global__ void num_vgpr_64() {`
			`// CHECK: define amdgpu_kernel void @_Z11num_vgpr_64v() [[NUM_VGPR_64:#[0-9]+]]`
			`}`

			`// Make sure this is silently accepted on other targets.`
			`// NAMD-NOT: "amdgpu-flat-work-group-size"`
			`// NAMD-NOT: "amdgpu-waves-per-eu"`
			`// NAMD-NOT: "amdgpu-num-vgpr"`
			`// NAMD-NOT: "amdgpu-num-sgpr"`

			`// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64"`
			`// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-waves-per-eu"="2"`
			`// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32"`
			`// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-vgpr"="64"`