llvm-project/clang/test/OpenMP/nvptx_allocate_codegen.cpp

// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin10.6.0 -fopenmp-targets=nvptx64-nvidia-cuda  -emit-llvm-bc -o %t-host.bc %s
// RUN: %clang_cc1 -verify -fopenmp -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - -disable-llvm-optzns | FileCheck %s
// expected-no-diagnostics

#ifndef HEADER
#define HEADER

#pragma omp declare target
typedef void **omp_allocator_handle_t;
extern const omp_allocator_handle_t omp_default_mem_alloc;
extern const omp_allocator_handle_t omp_large_cap_mem_alloc;
extern const omp_allocator_handle_t omp_const_mem_alloc;
extern const omp_allocator_handle_t omp_high_bw_mem_alloc;
extern const omp_allocator_handle_t omp_low_lat_mem_alloc;
extern const omp_allocator_handle_t omp_cgroup_mem_alloc;
extern const omp_allocator_handle_t omp_pteam_mem_alloc;
extern const omp_allocator_handle_t omp_thread_mem_alloc;

// CHECK-DAG: @{{.+}}St1{{.+}}b{{.+}} = external global i32,
// CHECK-DAG: @a ={{ hidden | }}global i32 0,
// CHECK-DAG: @b ={{ hidden | }}addrspace(4) global i32 0,
// CHECK-DAG: @c ={{ hidden | }}global i32 0,
// CHECK-DAG: @d ={{ hidden | }}global %struct.St1 zeroinitializer,
// CHECK-DAG: @{{.+}}ns{{.+}}a{{.+}} ={{ hidden | }}addrspace(3) global i32 0,
// CHECK-DAG: @{{.+}}main{{.+}}a{{.*}} = internal global i32 0,
// CHECK-DAG: @{{.+}}ST{{.+}}m{{.+}} = external global i32,
// CHECK-DAG: @bar_c = internal global i32 0,
// CHECK-DAG: @bar_b = internal addrspace(3) global double 0.000000e+00,
struct St{
 int a;
};

struct St1{
 int a;
 static int b;
#pragma omp allocate(b) allocator(omp_default_mem_alloc)
} d;

int a, b, c;
#pragma omp allocate(a) allocator(omp_large_cap_mem_alloc)
#pragma omp allocate(b) allocator(omp_const_mem_alloc)
#pragma omp allocate(d, c) allocator(omp_high_bw_mem_alloc)

template <class T>
struct ST {
  static T m;
  #pragma omp allocate(m) allocator(omp_low_lat_mem_alloc)
};

template <class T> T foo() {
  T v;
  #pragma omp allocate(v) allocator(omp_cgroup_mem_alloc)
  v = ST<T>::m;
  return v;
}

namespace ns{
  int a;
}
#pragma omp allocate(ns::a) allocator(omp_pteam_mem_alloc)

// CHECK-LABEL: @main
int main () {
  // CHECK: alloca double,
  static int a;
#pragma omp allocate(a) allocator(omp_thread_mem_alloc)
  a=2;
  double b = 3;
  float c;
#pragma omp allocate(b) allocator(omp_default_mem_alloc)
#pragma omp allocate(c) allocator(omp_cgroup_mem_alloc)
  return (foo<int>());
}

// CHECK: define {{.*}}i32 @{{.+}}foo{{.+}}()
// CHECK-NOT: alloca i32,

extern template int ST<int>::m;

void baz(float &);

// CHECK: define{{ hidden | }}void @{{.+}}bar{{.+}}()
void bar() {
  // CHECK: alloca float,
  float bar_a;
  // CHECK: alloca double,
  double bar_b;
  int bar_c;
#pragma omp allocate(bar_c) allocator(omp_cgroup_mem_alloc)
  // CHECK: call void [[OUTLINED:@.+]](i32* %{{.+}}, i32* %{{.+}})
#pragma omp parallel private(bar_a, bar_b) allocate(omp_thread_mem_alloc                  \
                                                    : bar_a) allocate(omp_pteam_mem_alloc \
                                                                      : bar_b)
  {
    bar_b = bar_a;
    baz(bar_a);
  }
// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
// CHECK-NOT: alloca double,
// CHECK: alloca float,
// CHECK-NOT: alloca double,
// CHECK: load float, float* %
// CHECK: store double {{.+}}, double addrspace(3)* @bar_b,
}

#pragma omp end declare target
#endif
[OPENMP]Codegen support for allocate directive on global variables. For the global variables the allocate directive must specify only the predefined allocator. This allocator must be translated into the correct form of the address space for the targets that support different address spaces. llvm-svn: 356702 2019-03-22 03:35:27 +08:00			`// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin10.6.0 -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc -o %t-host.bc %s`
[OPENMP][NVPTX]Mark more functions as always_inline for better performance. Internally generated functions must be marked as always_inlines in most cases. Patch marks some extra reduction function + outlined parallel functions as always_inline for better performance, but only if the optimization is requested. llvm-svn: 361269 2019-05-21 23:11:58 +08:00			`// RUN: %clang_cc1 -verify -fopenmp -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - -disable-llvm-optzns \| FileCheck %s`
[OPENMP]Codegen support for allocate directive on global variables. For the global variables the allocate directive must specify only the predefined allocator. This allocator must be translated into the correct form of the address space for the targets that support different address spaces. llvm-svn: 356702 2019-03-22 03:35:27 +08:00			`// expected-no-diagnostics`

			`#ifndef HEADER`
			`#define HEADER`

			`#pragma omp declare target`
			`typedef void **omp_allocator_handle_t;`
			`extern const omp_allocator_handle_t omp_default_mem_alloc;`
			`extern const omp_allocator_handle_t omp_large_cap_mem_alloc;`
			`extern const omp_allocator_handle_t omp_const_mem_alloc;`
			`extern const omp_allocator_handle_t omp_high_bw_mem_alloc;`
			`extern const omp_allocator_handle_t omp_low_lat_mem_alloc;`
			`extern const omp_allocator_handle_t omp_cgroup_mem_alloc;`
			`extern const omp_allocator_handle_t omp_pteam_mem_alloc;`
			`extern const omp_allocator_handle_t omp_thread_mem_alloc;`

			`// CHECK-DAG: @{{.+}}St1{{.+}}b{{.+}} = external global i32,`
[OPENMP]Fix PR41826: symbols visibility in device code. Summary: Currently, we ignore all locality attributes/info when building for the device and thus all symblos are externally visible and can be preemted at the runtime. It may lead to incorrect results. We need to follow the same logic, compiler uses for static/pie builds. But in some cases changing of dso locality may lead to problems with codegen, so instead mark external symbols as hidden instead in the device code. Reviewers: jdoerfert Subscribers: guansong, caomhin, kkwli0, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D70549 2019-11-22 00:30:43 +08:00			`// CHECK-DAG: @a ={{ hidden \| }}global i32 0,`
			`// CHECK-DAG: @b ={{ hidden \| }}addrspace(4) global i32 0,`
			`// CHECK-DAG: @c ={{ hidden \| }}global i32 0,`
			`// CHECK-DAG: @d ={{ hidden \| }}global %struct.St1 zeroinitializer,`
			`// CHECK-DAG: @{{.+}}ns{{.+}}a{{.+}} ={{ hidden \| }}addrspace(3) global i32 0,`
[OPENMP]Codegen support for allocate directive on global variables. For the global variables the allocate directive must specify only the predefined allocator. This allocator must be translated into the correct form of the address space for the targets that support different address spaces. llvm-svn: 356702 2019-03-22 03:35:27 +08:00			`// CHECK-DAG: @{{.+}}main{{.+}}a{{.*}} = internal global i32 0,`
			`// CHECK-DAG: @{{.+}}ST{{.+}}m{{.+}} = external global i32,`
[OPENMP][NVPTX]Fixed processing of memory management directives. Added special processing of the memory management directives/clauses for NVPTX target. For private locals, omp_default_mem_alloc and omp_thread_mem_alloc result in allocation in local memory. omp_const_mem_alloc allocates const memory, omp_teams_mem_alloc allocates shared memory, and omp_cgroup_mem_alloc and omp_large_cap_mem_alloc allocate global memory. llvm-svn: 357923 2019-04-09 00:53:57 +08:00			`// CHECK-DAG: @bar_c = internal global i32 0,`
			`// CHECK-DAG: @bar_b = internal addrspace(3) global double 0.000000e+00,`
[OPENMP]Codegen support for allocate directive on global variables. For the global variables the allocate directive must specify only the predefined allocator. This allocator must be translated into the correct form of the address space for the targets that support different address spaces. llvm-svn: 356702 2019-03-22 03:35:27 +08:00			`struct St{`
			`int a;`
			`};`

			`struct St1{`
			`int a;`
			`static int b;`
			`#pragma omp allocate(b) allocator(omp_default_mem_alloc)`
			`} d;`

			`int a, b, c;`
			`#pragma omp allocate(a) allocator(omp_large_cap_mem_alloc)`
			`#pragma omp allocate(b) allocator(omp_const_mem_alloc)`
			`#pragma omp allocate(d, c) allocator(omp_high_bw_mem_alloc)`

			`template <class T>`
			`struct ST {`
			`static T m;`
			`#pragma omp allocate(m) allocator(omp_low_lat_mem_alloc)`
			`};`

			`template <class T> T foo() {`
			`T v;`
			`#pragma omp allocate(v) allocator(omp_cgroup_mem_alloc)`
			`v = ST<T>::m;`
			`return v;`
			`}`

			`namespace ns{`
			`int a;`
			`}`
			`#pragma omp allocate(ns::a) allocator(omp_pteam_mem_alloc)`

[OPENMP] Simplify codegen for allocate directive on local variables. Simplified codegen for the allocate directive for local variables, initial implementation of the codegen for NVPTX target. llvm-svn: 356710 2019-03-22 04:36:16 +08:00			`// CHECK-LABEL: @main`
[OPENMP]Codegen support for allocate directive on global variables. For the global variables the allocate directive must specify only the predefined allocator. This allocator must be translated into the correct form of the address space for the targets that support different address spaces. llvm-svn: 356702 2019-03-22 03:35:27 +08:00			`int main () {`
[OPENMP] Simplify codegen for allocate directive on local variables. Simplified codegen for the allocate directive for local variables, initial implementation of the codegen for NVPTX target. llvm-svn: 356710 2019-03-22 04:36:16 +08:00			`// CHECK: alloca double,`
[OPENMP]Codegen support for allocate directive on global variables. For the global variables the allocate directive must specify only the predefined allocator. This allocator must be translated into the correct form of the address space for the targets that support different address spaces. llvm-svn: 356702 2019-03-22 03:35:27 +08:00			`static int a;`
			`#pragma omp allocate(a) allocator(omp_thread_mem_alloc)`
			`a=2;`
			`double b = 3;`
[OPENMP][NVPTX]Fixed processing of memory management directives. Added special processing of the memory management directives/clauses for NVPTX target. For private locals, omp_default_mem_alloc and omp_thread_mem_alloc result in allocation in local memory. omp_const_mem_alloc allocates const memory, omp_teams_mem_alloc allocates shared memory, and omp_cgroup_mem_alloc and omp_large_cap_mem_alloc allocate global memory. llvm-svn: 357923 2019-04-09 00:53:57 +08:00			`float c;`
[OPENMP]Emit error message for allocate directive without allocator clause in target region. According to the OpenMP 5.0, 2.11.3 allocate Directive, Restrictions, allocate directives that appear in a target region must specify an allocator clause unless a requires directive with the dynamic_allocators clause is present in the same compilation unit. llvm-svn: 356752 2019-03-22 22:41:39 +08:00			`#pragma omp allocate(b) allocator(omp_default_mem_alloc)`
[OPENMP][NVPTX]Fixed processing of memory management directives. Added special processing of the memory management directives/clauses for NVPTX target. For private locals, omp_default_mem_alloc and omp_thread_mem_alloc result in allocation in local memory. omp_const_mem_alloc allocates const memory, omp_teams_mem_alloc allocates shared memory, and omp_cgroup_mem_alloc and omp_large_cap_mem_alloc allocate global memory. llvm-svn: 357923 2019-04-09 00:53:57 +08:00			`#pragma omp allocate(c) allocator(omp_cgroup_mem_alloc)`
[OPENMP]Codegen support for allocate directive on global variables. For the global variables the allocate directive must specify only the predefined allocator. This allocator must be translated into the correct form of the address space for the targets that support different address spaces. llvm-svn: 356702 2019-03-22 03:35:27 +08:00			`return (foo<int>());`
			`}`

[OPENMP] Simplify codegen for allocate directive on local variables. Simplified codegen for the allocate directive for local variables, initial implementation of the codegen for NVPTX target. llvm-svn: 356710 2019-03-22 04:36:16 +08:00			`// CHECK: define {{.*}}i32 @{{.+}}foo{{.+}}()`
[OPENMP][NVPTX]Fixed processing of memory management directives. Added special processing of the memory management directives/clauses for NVPTX target. For private locals, omp_default_mem_alloc and omp_thread_mem_alloc result in allocation in local memory. omp_const_mem_alloc allocates const memory, omp_teams_mem_alloc allocates shared memory, and omp_cgroup_mem_alloc and omp_large_cap_mem_alloc allocate global memory. llvm-svn: 357923 2019-04-09 00:53:57 +08:00			`// CHECK-NOT: alloca i32,`
[OPENMP] Simplify codegen for allocate directive on local variables. Simplified codegen for the allocate directive for local variables, initial implementation of the codegen for NVPTX target. llvm-svn: 356710 2019-03-22 04:36:16 +08:00
[OPENMP]Codegen support for allocate directive on global variables. For the global variables the allocate directive must specify only the predefined allocator. This allocator must be translated into the correct form of the address space for the targets that support different address spaces. llvm-svn: 356702 2019-03-22 03:35:27 +08:00			`extern template int ST<int>::m;`
[OPENMP][NVPTX]Fixed processing of memory management directives. Added special processing of the memory management directives/clauses for NVPTX target. For private locals, omp_default_mem_alloc and omp_thread_mem_alloc result in allocation in local memory. omp_const_mem_alloc allocates const memory, omp_teams_mem_alloc allocates shared memory, and omp_cgroup_mem_alloc and omp_large_cap_mem_alloc allocate global memory. llvm-svn: 357923 2019-04-09 00:53:57 +08:00
			`void baz(float &);`

[OPENMP]Fix PR41826: symbols visibility in device code. Summary: Currently, we ignore all locality attributes/info when building for the device and thus all symblos are externally visible and can be preemted at the runtime. It may lead to incorrect results. We need to follow the same logic, compiler uses for static/pie builds. But in some cases changing of dso locality may lead to problems with codegen, so instead mark external symbols as hidden instead in the device code. Reviewers: jdoerfert Subscribers: guansong, caomhin, kkwli0, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D70549 2019-11-22 00:30:43 +08:00			`// CHECK: define{{ hidden \| }}void @{{.+}}bar{{.+}}()`
[OPENMP][NVPTX]Fixed processing of memory management directives. Added special processing of the memory management directives/clauses for NVPTX target. For private locals, omp_default_mem_alloc and omp_thread_mem_alloc result in allocation in local memory. omp_const_mem_alloc allocates const memory, omp_teams_mem_alloc allocates shared memory, and omp_cgroup_mem_alloc and omp_large_cap_mem_alloc allocate global memory. llvm-svn: 357923 2019-04-09 00:53:57 +08:00			`void bar() {`
			`// CHECK: alloca float,`
			`float bar_a;`
			`// CHECK: alloca double,`
			`double bar_b;`
			`int bar_c;`
			`#pragma omp allocate(bar_c) allocator(omp_cgroup_mem_alloc)`
			`// CHECK: call void [[OUTLINED:@.+]](i32* %{{.+}}, i32* %{{.+}})`
			`#pragma omp parallel private(bar_a, bar_b) allocate(omp_thread_mem_alloc \`
			`: bar_a) allocate(omp_pteam_mem_alloc \`
			`: bar_b)`
			`{`
			`bar_b = bar_a;`
			`baz(bar_a);`
			`}`
			`// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}})`
			`// CHECK-NOT: alloca double,`
			`// CHECK: alloca float,`
			`// CHECK-NOT: alloca double,`
			`// CHECK: load float, float* %`
			`// CHECK: store double {{.+}}, double addrspace(3)* @bar_b,`
			`}`

[OPENMP]Codegen support for allocate directive on global variables. For the global variables the allocate directive must specify only the predefined allocator. This allocator must be translated into the correct form of the address space for the targets that support different address spaces. llvm-svn: 356702 2019-03-22 03:35:27 +08:00			`#pragma omp end declare target`
			`#endif`