llvm-project/clang/test/OpenMP/nvptx_target_teams_distribu...

// Test target codegen - host bc file has to be created first.
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32

// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32

// expected-no-diagnostics
#ifndef HEADER
#define HEADER

// Check that the execution mode of all 2 target regions on the gpu is set to NonSPMD Mode.
// CHECK-DAG: {{@__omp_offloading_.+l37}}_exec_mode = weak constant i8 0
// CHECK-DAG: {{@__omp_offloading_.+l43}}_exec_mode = weak constant i8 0
// CHECK-DAG: {{@__omp_offloading_.+l48}}_exec_mode = weak constant i8 0
// CHECK-DAG: {{@__omp_offloading_.+l53}}_exec_mode = weak constant i8 0

#define N 1000
#define M 10

template<typename tx>
tx ftemplate(int n) {
  tx a[N];
  short aa[N];
  tx b[10];
  tx c[M][M];
  tx f = n;
  tx l;
  int k;

#pragma omp target teams distribute simd lastprivate(l) dist_schedule(static,128)
  for(int i = 0; i < n; i++) {
    a[i] = 1;
    l = i;
  }

  #pragma omp target teams distribute simd map(tofrom: aa) num_teams(M) thread_limit(64)
  for(int i = 0; i < n; i++) {
    aa[i] += 1;
  }

#pragma omp target teams distribute simd map(tofrom:a, aa, b) if(target: n>40)
  for(int i = 0; i < 10; i++) {
    b[i] += 1;
  }

#pragma omp target teams distribute simd collapse(2) firstprivate(f) private(k)
  for(int i = 0; i < M; i++) {
    for(int j = 0; j < M; j++) {
      k = M;
      c[i][j] = i + j * f + k;
    }
  }

  return a[0];
}

int bar(int n){
  int a = 0;

  a += ftemplate<int>(n);

  return a;
}

// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l37(
// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)

// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: ret void

// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l43(
// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)

// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: ret void

// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l48(
// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)

// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: ret void

// CHECK: define {{.*}}void {{@__omp_offloading_.+}}_l53({{.+}}, i{{32|64}} [[F_IN:%.+]])
// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},
// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)

// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: ret void

#endif
[OpenMP] Extend NVPTX SPMD implementation of combined constructs Differential Revision: https://reviews.llvm.org/D43852 This patch extends the SPMD implementation to all target constructs and guards this implementation under a new flag. llvm-svn: 326368 2018-03-01 04:48:35 +08:00			`// Test target codegen - host bc file has to be created first.`
			`// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc`
			`// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - \| FileCheck %s --check-prefix CHECK --check-prefix CHECK-64`
			`// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc`
			`// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - \| FileCheck %s --check-prefix CHECK --check-prefix CHECK-32`
			`// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - \| FileCheck %s --check-prefix CHECK --check-prefix CHECK-32`
[OpenMP] Ensure testing for versions 4.5 and default - Part 2 Many OpenMP Clang tests do not RUN for version 4.5 and the default version. This second patch in the series handles test cases which require updation in CHECK lines along with adding RUN lines for the default version. It involves updating line number of pragmas. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D85150 2020-08-28 02:50:34 +08:00
			`// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc`
			`// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - \| FileCheck %s --check-prefix CHECK --check-prefix CHECK-64`
			`// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc`
			`// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - \| FileCheck %s --check-prefix CHECK --check-prefix CHECK-32`
			`// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - \| FileCheck %s --check-prefix CHECK --check-prefix CHECK-32`

[OpenMP] Extend NVPTX SPMD implementation of combined constructs Differential Revision: https://reviews.llvm.org/D43852 This patch extends the SPMD implementation to all target constructs and guards this implementation under a new flag. llvm-svn: 326368 2018-03-01 04:48:35 +08:00			`// expected-no-diagnostics`
			`#ifndef HEADER`
			`#define HEADER`

[OPENMP, NVPTX] Do not use SPMD mode for target simd and target teams distribute simd directives. Directives `target simd` and `target teams distribute simd` must be executed in non-SPMD mode. llvm-svn: 332129 2018-05-12 03:45:14 +08:00			`// Check that the execution mode of all 2 target regions on the gpu is set to NonSPMD Mode.`
[OpenMP] Ensure testing for versions 4.5 and default - Part 2 Many OpenMP Clang tests do not RUN for version 4.5 and the default version. This second patch in the series handles test cases which require updation in CHECK lines along with adding RUN lines for the default version. It involves updating line number of pragmas. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D85150 2020-08-28 02:50:34 +08:00			`// CHECK-DAG: {{@__omp_offloading_.+l37}}_exec_mode = weak constant i8 0`
			`// CHECK-DAG: {{@__omp_offloading_.+l43}}_exec_mode = weak constant i8 0`
			`// CHECK-DAG: {{@__omp_offloading_.+l48}}_exec_mode = weak constant i8 0`
			`// CHECK-DAG: {{@__omp_offloading_.+l53}}_exec_mode = weak constant i8 0`
[OpenMP] Extend NVPTX SPMD implementation of combined constructs Differential Revision: https://reviews.llvm.org/D43852 This patch extends the SPMD implementation to all target constructs and guards this implementation under a new flag. llvm-svn: 326368 2018-03-01 04:48:35 +08:00
			`#define N 1000`
			`#define M 10`

			`template<typename tx>`
			`tx ftemplate(int n) {`
			`tx a[N];`
			`short aa[N];`
			`tx b[10];`
[OPENMP][NVPTX]Run combined constructs with if clause in SPMD mode. All target-parallel-based constructs can be run in SPMD mode from now on. Even if num_threads clauses or if clauses are used, such constructs can be executed in SPMD mode. llvm-svn: 358595 2019-04-18 00:53:08 +08:00			`tx c[M][M];`
[OpenMP] Extend NVPTX SPMD implementation of combined constructs Differential Revision: https://reviews.llvm.org/D43852 This patch extends the SPMD implementation to all target constructs and guards this implementation under a new flag. llvm-svn: 326368 2018-03-01 04:48:35 +08:00			`tx f = n;`
			`tx l;`
			`int k;`

			`#pragma omp target teams distribute simd lastprivate(l) dist_schedule(static,128)`
			`for(int i = 0; i < n; i++) {`
			`a[i] = 1;`
			`l = i;`
			`}`

			`#pragma omp target teams distribute simd map(tofrom: aa) num_teams(M) thread_limit(64)`
			`for(int i = 0; i < n; i++) {`
			`aa[i] += 1;`
			`}`

			`#pragma omp target teams distribute simd map(tofrom:a, aa, b) if(target: n>40)`
			`for(int i = 0; i < 10; i++) {`
			`b[i] += 1;`
			`}`

			`#pragma omp target teams distribute simd collapse(2) firstprivate(f) private(k)`
			`for(int i = 0; i < M; i++) {`
			`for(int j = 0; j < M; j++) {`
			`k = M;`
[OPENMP][NVPTX]Run combined constructs with if clause in SPMD mode. All target-parallel-based constructs can be run in SPMD mode from now on. Even if num_threads clauses or if clauses are used, such constructs can be executed in SPMD mode. llvm-svn: 358595 2019-04-18 00:53:08 +08:00			`c[i][j] = i + j * f + k;`
[OpenMP] Extend NVPTX SPMD implementation of combined constructs Differential Revision: https://reviews.llvm.org/D43852 This patch extends the SPMD implementation to all target constructs and guards this implementation under a new flag. llvm-svn: 326368 2018-03-01 04:48:35 +08:00			`}`
			`}`

			`return a[0];`
			`}`

			`int bar(int n){`
			`int a = 0;`

			`a += ftemplate<int>(n);`

			`return a;`
			`}`

[OpenMP] Ensure testing for versions 4.5 and default - Part 2 Many OpenMP Clang tests do not RUN for version 4.5 and the default version. This second patch in the series handles test cases which require updation in CHECK lines along with adding RUN lines for the default version. It involves updating line number of pragmas. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D85150 2020-08-28 02:50:34 +08:00			`// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l37(`
[OpenMP][RTL] Remove dead code RequiresDataSharing was always 0, resulting dead code in device runtime library. Reviewed By: jdoerfert, JonChesterfield Differential Revision: https://reviews.llvm.org/D88829 2020-10-05 20:59:26 +08:00			`// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0)`
[OPENMP][NVPTX]Mark more functions as always_inline for better performance. Internally generated functions must be marked as always_inlines in most cases. Patch marks some extra reduction function + outlined parallel functions as always_inline for better performance, but only if the optimization is requested. llvm-svn: 361269 2019-05-21 23:11:58 +08:00			`// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)`

[OpenMP] Extend NVPTX SPMD implementation of combined constructs Differential Revision: https://reviews.llvm.org/D43852 This patch extends the SPMD implementation to all target constructs and guards this implementation under a new flag. llvm-svn: 326368 2018-03-01 04:48:35 +08:00			`// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,`
			`// CHECK: call void @__kmpc_for_static_fini(`
			`// CHECK: ret void`

[OpenMP] Ensure testing for versions 4.5 and default - Part 2 Many OpenMP Clang tests do not RUN for version 4.5 and the default version. This second patch in the series handles test cases which require updation in CHECK lines along with adding RUN lines for the default version. It involves updating line number of pragmas. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D85150 2020-08-28 02:50:34 +08:00			`// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l43(`
[OpenMP][RTL] Remove dead code RequiresDataSharing was always 0, resulting dead code in device runtime library. Reviewed By: jdoerfert, JonChesterfield Differential Revision: https://reviews.llvm.org/D88829 2020-10-05 20:59:26 +08:00			`// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0)`
[OPENMP][NVPTX]Mark more functions as always_inline for better performance. Internally generated functions must be marked as always_inlines in most cases. Patch marks some extra reduction function + outlined parallel functions as always_inline for better performance, but only if the optimization is requested. llvm-svn: 361269 2019-05-21 23:11:58 +08:00			`// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)`

[OPENMP][NVPTX]Run combined constructs with if clause in SPMD mode. All target-parallel-based constructs can be run in SPMD mode from now on. Even if num_threads clauses or if clauses are used, such constructs can be executed in SPMD mode. llvm-svn: 358595 2019-04-18 00:53:08 +08:00			`// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,`
[OpenMP] Extend NVPTX SPMD implementation of combined constructs Differential Revision: https://reviews.llvm.org/D43852 This patch extends the SPMD implementation to all target constructs and guards this implementation under a new flag. llvm-svn: 326368 2018-03-01 04:48:35 +08:00			`// CHECK: call void @__kmpc_for_static_fini(`
			`// CHECK: ret void`

[OpenMP] Ensure testing for versions 4.5 and default - Part 2 Many OpenMP Clang tests do not RUN for version 4.5 and the default version. This second patch in the series handles test cases which require updation in CHECK lines along with adding RUN lines for the default version. It involves updating line number of pragmas. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D85150 2020-08-28 02:50:34 +08:00			`// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l48(`
[OpenMP][RTL] Remove dead code RequiresDataSharing was always 0, resulting dead code in device runtime library. Reviewed By: jdoerfert, JonChesterfield Differential Revision: https://reviews.llvm.org/D88829 2020-10-05 20:59:26 +08:00			`// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0)`
[OPENMP][NVPTX]Mark more functions as always_inline for better performance. Internally generated functions must be marked as always_inlines in most cases. Patch marks some extra reduction function + outlined parallel functions as always_inline for better performance, but only if the optimization is requested. llvm-svn: 361269 2019-05-21 23:11:58 +08:00			`// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)`

[OPENMP][NVPTX]Run combined constructs with if clause in SPMD mode. All target-parallel-based constructs can be run in SPMD mode from now on. Even if num_threads clauses or if clauses are used, such constructs can be executed in SPMD mode. llvm-svn: 358595 2019-04-18 00:53:08 +08:00			`// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,`
[OpenMP] Extend NVPTX SPMD implementation of combined constructs Differential Revision: https://reviews.llvm.org/D43852 This patch extends the SPMD implementation to all target constructs and guards this implementation under a new flag. llvm-svn: 326368 2018-03-01 04:48:35 +08:00			`// CHECK: call void @__kmpc_for_static_fini(`
			`// CHECK: ret void`

[OpenMP] Ensure testing for versions 4.5 and default - Part 2 Many OpenMP Clang tests do not RUN for version 4.5 and the default version. This second patch in the series handles test cases which require updation in CHECK lines along with adding RUN lines for the default version. It involves updating line number of pragmas. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D85150 2020-08-28 02:50:34 +08:00			`// CHECK: define {{.*}}void {{@__omp_offloading_.+}}_l53({{.+}}, i{{32\|64}} [[F_IN:%.+]])`
[OpenMP] Extend NVPTX SPMD implementation of combined constructs Differential Revision: https://reviews.llvm.org/D43852 This patch extends the SPMD implementation to all target constructs and guards this implementation under a new flag. llvm-svn: 326368 2018-03-01 04:48:35 +08:00			`// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},`
[OpenMP][RTL] Remove dead code RequiresDataSharing was always 0, resulting dead code in device runtime library. Reviewed By: jdoerfert, JonChesterfield Differential Revision: https://reviews.llvm.org/D88829 2020-10-05 20:59:26 +08:00			`// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0)`
[OPENMP][NVPTX]Mark more functions as always_inline for better performance. Internally generated functions must be marked as always_inlines in most cases. Patch marks some extra reduction function + outlined parallel functions as always_inline for better performance, but only if the optimization is requested. llvm-svn: 361269 2019-05-21 23:11:58 +08:00			`// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)`

[OpenMP] Extend NVPTX SPMD implementation of combined constructs Differential Revision: https://reviews.llvm.org/D43852 This patch extends the SPMD implementation to all target constructs and guards this implementation under a new flag. llvm-svn: 326368 2018-03-01 04:48:35 +08:00			`// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align`
[OPENMP][NVPTX]Run combined constructs with if clause in SPMD mode. All target-parallel-based constructs can be run in SPMD mode from now on. Even if num_threads clauses or if clauses are used, such constructs can be executed in SPMD mode. llvm-svn: 358595 2019-04-18 00:53:08 +08:00			`// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],`
[OpenMP] Extend NVPTX SPMD implementation of combined constructs Differential Revision: https://reviews.llvm.org/D43852 This patch extends the SPMD implementation to all target constructs and guards this implementation under a new flag. llvm-svn: 326368 2018-03-01 04:48:35 +08:00			`// CHECK: call void @__kmpc_for_static_fini(`
			`// CHECK: ret void`

			`#endif`