From 1472e32cd732393f811c382247c0ae7f77212a92 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 19 Apr 2019 16:48:38 +0000 Subject: [PATCH] [OPENMP][NVPTX] target [teams distribute] simd maybe run without runtime. target [teams distribute] simd costructs do not require full runtime for the correct execution, we can run them without full runtime. llvm-svn: 358766 --- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 9 ++++++++- clang/test/OpenMP/nvptx_target_simd_codegen.cpp | 16 ++++++++-------- ...vptx_target_teams_distribute_simd_codegen.cpp | 16 ++++++++-------- 3 files changed, 24 insertions(+), 17 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 16f6c125fe06..f4b044744fed 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -907,6 +907,8 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir)) return true; + if (DKind == OMPD_teams_distribute_simd || DKind == OMPD_simd) + return true; if (DKind == OMPD_parallel) { Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( /*IgnoreCaptured=*/true); @@ -955,6 +957,8 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir)) return true; + if (DKind == OMPD_distribute_simd || DKind == OMPD_simd) + return true; if (DKind == OMPD_parallel) { Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( /*IgnoreCaptured=*/true); @@ -971,6 +975,8 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, } return false; case OMPD_target_parallel: + if (DKind == OMPD_simd) + return true; return isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir); case OMPD_target_teams_distribute: @@ -1052,8 +1058,9 @@ static bool supportsLightweightRuntime(ASTContext &Ctx, // (Last|First)-privates must be shared in parallel region. return hasStaticScheduling(D); case OMPD_target_simd: - case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: + return true; + case OMPD_target_teams_distribute: return false; case OMPD_parallel: case OMPD_for: diff --git a/clang/test/OpenMP/nvptx_target_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_simd_codegen.cpp index ce4a4238f812..073d6fa2f14e 100644 --- a/clang/test/OpenMP/nvptx_target_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_simd_codegen.cpp @@ -54,33 +54,33 @@ int bar(int n){ } // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l25}}( -// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 1, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0, i16 0) // CHECK-NOT: call void @__kmpc_for_static_init // CHECK-NOT: call void @__kmpc_for_static_fini -// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) +// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l30}}( -// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 1, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0, i16 0) // CHECK-NOT: call void @__kmpc_for_static_init // CHECK-NOT: call void @__kmpc_for_static_fini -// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) +// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l35}}( -// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 1, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0, i16 0) // CHECK-NOT: call void @__kmpc_for_static_init // CHECK-NOT: call void @__kmpc_for_static_fini -// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) +// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l40}}( -// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 1, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0, i16 0) // CHECK-NOT: call void @__kmpc_for_static_init // CHECK-NOT: call void @__kmpc_for_static_fini // CHECK-NOT: call i32 @__kmpc_nvptx_simd_reduce_nowait( // CHECK-NOT: call void @__kmpc_nvptx_end_reduce_nowait( -// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) +// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp index f86b456b3b07..6051637d553b 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp @@ -63,33 +63,33 @@ int bar(int n){ } // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l30( -// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 1, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0, i16 0) // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, // CHECK: call void @__kmpc_for_static_fini( -// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) +// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l36( -// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 1, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0, i16 0) // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, // CHECK: call void @__kmpc_for_static_fini( -// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) +// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l41( -// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 1, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0, i16 0) // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, // CHECK: call void @__kmpc_for_static_fini( -// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) +// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK: ret void // CHECK: define {{.*}}void {{@__omp_offloading_.+}}_l46({{.+}}, i{{32|64}} [[F_IN:%.+]]) // CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}}, -// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 1, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0, i16 0) // CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]], // CHECK: call void @__kmpc_for_static_fini( -// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) +// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK: ret void #endif