forked from OSchip/llvm-project
[OpenCL][CUDA][HIP][SYCL] Add norecurse
norecurse function attr indicates the function is not called recursively directly or indirectly. Add norecurse to OpenCL functions, SYCL functions in device compilation and CUDA/HIP kernels. Although there is LLVM pass adding norecurse to functions, it only works for whole-program compilation. Also FE adding norecurse can make that pass run faster since functions with norecurse do not need to be checked again. Differential Revision: https://reviews.llvm.org/D73651
This commit is contained in:
parent
20c5968e09
commit
fb44b9db95
|
@ -918,10 +918,20 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
|
||||||
// If we're in C++ mode and the function name is "main", it is guaranteed
|
// If we're in C++ mode and the function name is "main", it is guaranteed
|
||||||
// to be norecurse by the standard (3.6.1.3 "The function main shall not be
|
// to be norecurse by the standard (3.6.1.3 "The function main shall not be
|
||||||
// used within a program").
|
// used within a program").
|
||||||
if (getLangOpts().CPlusPlus)
|
//
|
||||||
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
|
// OpenCL C 2.0 v2.2-11 s6.9.i:
|
||||||
if (FD->isMain())
|
// Recursion is not supported.
|
||||||
Fn->addFnAttr(llvm::Attribute::NoRecurse);
|
//
|
||||||
|
// SYCL v1.2.1 s3.10:
|
||||||
|
// kernels cannot include RTTI information, exception classes,
|
||||||
|
// recursive code, virtual functions or make use of C++ libraries that
|
||||||
|
// are not compiled for the device.
|
||||||
|
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
|
||||||
|
if ((getLangOpts().CPlusPlus && FD->isMain()) || getLangOpts().OpenCL ||
|
||||||
|
getLangOpts().SYCLIsDevice ||
|
||||||
|
(getLangOpts().CUDA && FD->hasAttr<CUDAGlobalAttr>()))
|
||||||
|
Fn->addFnAttr(llvm::Attribute::NoRecurse);
|
||||||
|
}
|
||||||
|
|
||||||
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
|
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
|
||||||
if (FD->usesFPIntrin())
|
if (FD->usesFPIntrin())
|
||||||
|
|
|
@ -0,0 +1,15 @@
|
||||||
|
// REQUIRES: amdgpu-registered-target
|
||||||
|
// REQUIRES: nvptx-registered-target
|
||||||
|
|
||||||
|
// RUN: %clang_cc1 -triple nvptx-nvidia-cuda -fcuda-is-device \
|
||||||
|
// RUN: -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
|
||||||
|
|
||||||
|
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
|
||||||
|
// RUN: -emit-llvm -disable-llvm-passes -o - -x hip %s | FileCheck %s
|
||||||
|
|
||||||
|
#include "Inputs/cuda.h"
|
||||||
|
|
||||||
|
__global__ void kernel1(int a) {}
|
||||||
|
// CHECK: define{{.*}}@_Z7kernel1i{{.*}}#[[ATTR:[0-9]*]]
|
||||||
|
|
||||||
|
// CHECK: attributes #[[ATTR]] = {{.*}}norecurse
|
|
@ -48,16 +48,33 @@ __global__ void kernel() { lib_fn(); }
|
||||||
}
|
}
|
||||||
|
|
||||||
// The kernel and lib function should have the same attributes.
|
// The kernel and lib function should have the same attributes.
|
||||||
// CHECK: define void @kernel() [[attr:#[0-9]+]]
|
// CHECK: define void @kernel() [[kattr:#[0-9]+]]
|
||||||
// CHECK: define internal void @lib_fn() [[attr]]
|
// CHECK: define internal void @lib_fn() [[fattr:#[0-9]+]]
|
||||||
|
|
||||||
// FIXME: These -NOT checks do not work as intended and do not check on the same
|
// FIXME: These -NOT checks do not work as intended and do not check on the same
|
||||||
// line.
|
// line.
|
||||||
|
|
||||||
// Check the attribute list.
|
// Check the attribute list for kernel.
|
||||||
// CHECK: attributes [[attr]] = {
|
// CHECK: attributes [[kattr]] = {
|
||||||
|
|
||||||
// CHECK-SAME: convergent
|
// CHECK-SAME: convergent
|
||||||
|
// CHECK-SAME: norecurse
|
||||||
|
|
||||||
|
// FTZ-NOT: "denormal-fp-math"
|
||||||
|
|
||||||
|
// FTZ-SAME: "denormal-fp-math-f32"="preserve-sign,preserve-sign"
|
||||||
|
// NOFTZ-SAME: "denormal-fp-math-f32"="ieee,ieee"
|
||||||
|
|
||||||
|
// CHECK-SAME: "no-trapping-math"="true"
|
||||||
|
|
||||||
|
// FAST-SAME: "unsafe-fp-math"="true"
|
||||||
|
// NOFAST-NOT: "unsafe-fp-math"="true"
|
||||||
|
|
||||||
|
// Check the attribute list for lib_fn.
|
||||||
|
// CHECK: attributes [[fattr]] = {
|
||||||
|
|
||||||
|
// CHECK-SAME: convergent
|
||||||
|
// CHECK-NOT: norecurse
|
||||||
|
|
||||||
// FTZ-NOT: "denormal-fp-math"
|
// FTZ-NOT: "denormal-fp-math"
|
||||||
|
|
||||||
|
|
|
@ -162,33 +162,33 @@ kernel void default_kernel() {
|
||||||
// CHECK-NOT: "amdgpu-num-sgpr"="0"
|
// CHECK-NOT: "amdgpu-num-sgpr"="0"
|
||||||
// CHECK-NOT: "amdgpu-num-vgpr"="0"
|
// CHECK-NOT: "amdgpu-num-vgpr"="0"
|
||||||
|
|
||||||
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56"
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56"
|
||||||
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64" "amdgpu-implicitarg-num-bytes"="56"
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = {{.*}} "amdgpu-flat-work-group-size"="64,64" "amdgpu-implicitarg-num-bytes"="56"
|
||||||
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128" "amdgpu-implicitarg-num-bytes"="56"
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = {{.*}} "amdgpu-flat-work-group-size"="16,128" "amdgpu-implicitarg-num-bytes"="56"
|
||||||
|
|
||||||
// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2"
|
// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2"
|
||||||
|
|
||||||
// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2,4"
|
// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2,4"
|
||||||
// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32"
|
// CHECK-DAG: attributes [[NUM_SGPR_32]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32"
|
||||||
// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64"
|
// CHECK-DAG: attributes [[NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64"
|
||||||
|
|
||||||
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2"
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2"
|
||||||
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2,4"
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2,4"
|
||||||
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32"
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32"
|
||||||
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64"
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64"
|
||||||
// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
|
// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
|
||||||
// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
|
// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
|
||||||
// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
|
// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
|
||||||
// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
|
// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
|
||||||
// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64"
|
// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64"
|
||||||
|
|
||||||
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
|
||||||
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
|
||||||
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
|
||||||
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
|
||||||
|
|
||||||
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
|
||||||
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
|
||||||
|
|
||||||
// CHECK-DAG: attributes [[A_FUNCTION]] = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false"
|
// CHECK-DAG: attributes [[A_FUNCTION]] = {{.*}} "correctly-rounded-divide-sqrt-fp-math"="false"
|
||||||
// CHECK-DAG: attributes [[DEFAULT_KERNEL_ATTRS]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56"
|
// CHECK-DAG: attributes [[DEFAULT_KERNEL_ATTRS]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56"
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
// RUN: %clang_cc1 -O0 -emit-llvm -o - %s | FileCheck %s
|
||||||
|
|
||||||
|
kernel void kernel1(int a) {}
|
||||||
|
// CHECK: define{{.*}}@kernel1{{.*}}#[[ATTR:[0-9]*]]
|
||||||
|
|
||||||
|
// CHECK: attributes #[[ATTR]] = {{.*}}norecurse
|
|
@ -0,0 +1,9 @@
|
||||||
|
// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - \
|
||||||
|
// RUN: -verify -fsyntax-only -verify-ignore-unexpected=note
|
||||||
|
|
||||||
|
#include "Inputs/cuda.h"
|
||||||
|
|
||||||
|
__global__ void kernel1();
|
||||||
|
__global__ void kernel2() {
|
||||||
|
kernel1<<<1,1>>>(); // expected-error {{reference to __global__ function 'kernel1' in __global__ function}}
|
||||||
|
}
|
Loading…
Reference in New Issue