From 25d3de8a0ac4d65f2ac9820ba62a372414780649 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@hotmail.com>
Date: Mon, 7 Jan 2019 15:45:09 +0000
Subject: [PATCH] [OPENMP][NVPTX]Reduce number of barriers in reductions.

After the fix for the syncthreads we don't need to generate extra
barriers for the parallel reductions.

llvm-svn: 350530
---
 clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp                 | 7 -------
 .../OpenMP/nvptx_target_parallel_reduction_codegen.cpp     | 4 ----
 clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp        | 1 -
 3 files changed, 12 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 21911c96f36a..403aefbef9ae 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -3124,7 +3124,6 @@ static void emitReductionListCopy(
 ///     sync
 ///     if (I am the first warp)
 ///       Copy smem[thread_id] to my local D
-///     sync
 static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
                                               ArrayRef<const Expr *> Privates,
                                               QualType ReductionArrayTy,
@@ -3337,12 +3336,6 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
 
       CGF.EmitBlock(W0MergeBB);
 
-      // While warp 0 copies values from transfer medium, all other warps must
-      // wait.
-      // kmpc_barrier.
-      CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc, OMPD_unknown,
-                                             /*EmitChecks=*/false,
-                                             /*ForceSimpleCall=*/true);
       if (NumIters > 1) {
         Cnt = Bld.CreateNSWAdd(Cnt, llvm::ConstantInt::get(CGM.IntTy, /*V=*/1));
         CGF.EmitStoreOfScalar(Cnt, CntAddr, /*Volatile=*/false, C.IntTy);
diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
index 4f06f33e6850..c23f275bd1a4 100644
--- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
@@ -231,7 +231,6 @@ int bar(int n){
   // CHECK: br label {{%?}}[[READ_CONT]]
   //
   // CHECK: [[READ_CONT]]
-  // CHECK: call void @__kmpc_barrier(%struct.ident_t* @
   // CHECK: [[NEXT:%.+]] = add nsw i32 [[CNT]], 1
   // CHECK: store i32 [[NEXT]], i32* [[CNT_ADDR]],
   // CHECK: br label
@@ -468,7 +467,6 @@ int bar(int n){
   //
   // CHECK: [[READ_CONT]]
   // CHECK: call void @__kmpc_barrier(%struct.ident_t* @
-  // CHECK: call void @__kmpc_barrier(%struct.ident_t* @
   // CHECK: [[IS_WARP_MASTER:%.+]] = icmp eq i32 [[LANEID]], 0
   // CHECK: br i1 [[IS_WARP_MASTER]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]]
   //
@@ -507,7 +505,6 @@ int bar(int n){
   // CHECK: br label {{%?}}[[READ_CONT]]
   //
   // CHECK: [[READ_CONT]]
-  // CHECK: call void @__kmpc_barrier(%struct.ident_t* @
   // CHECK: ret
 
 
@@ -823,7 +820,6 @@ int bar(int n){
   // CHECK: br label {{%?}}[[READ_CONT]]
   //
   // CHECK: [[READ_CONT]]
-  // CHECK: call void @__kmpc_barrier(%struct.ident_t* @
   // CHECK: ret
 
 #endif
diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp
index b0246e2ee80e..0de25295a7a9 100644
--- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp
@@ -473,7 +473,6 @@ int bar(int n){
   // CHECK: br label {{%?}}[[READ_CONT]]
   //
   // CHECK: [[READ_CONT]]
-  // CHECK: call void @__kmpc_barrier(%struct.ident_t* @
   // CHECK: ret
 
 #endif