[OPENMP] Treat local variables in CUDA mode as thread local.

In CUDA mode all local variables are actually thread local|threadprivate, not private, and, thus, they cannot be shared between threads|lanes. llvm-svn: 326590
2018-03-02 17:17:12 +00:00 · 2018-03-02 17:17:12 +00:00 · 852525de25
parent b9699c009d
commit 852525de25
3 changed files with 168 additions and 16 deletions
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@ -1427,7 +1427,7 @@ def fopenmp_simd : Flag<["-"], "fopenmp-simd">, Group<f_Group>, Flags<[CC1Option
  HelpText<"Emit OpenMP code only for SIMD-based constructs.">;
 def fno_openmp_simd : Flag<["-"], "fno-openmp-simd">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
 def fopenmp_cuda_mode : Flag<["-"], "fopenmp-cuda-mode">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
-def fno_openmp_cuda_mode : Flag<["-"], "fno-openmp-cuda-mode">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
+def fno_openmp_cuda_mode : Flag<["-"], "fno-openmp-cuda-mode">, Group<f_Group>, Flags<[NoArgumentUnused]>;
 def fno_optimize_sibling_calls : Flag<["-"], "fno-optimize-sibling-calls">, Group<f_Group>;
 def foptimize_sibling_calls : Flag<["-"], "foptimize-sibling-calls">, Group<f_Group>;
 def fno_escaping_block_tail_calls : Flag<["-"], "fno-escaping-block-tail-calls">, Group<f_Group>, Flags<[CC1Option]>;
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@ -936,10 +936,11 @@ DSAStackTy::getTopMostTaskgroupReductionData(ValueDecl *D, SourceRange &SR,

 bool DSAStackTy::isOpenMPLocal(VarDecl *D, StackTy::reverse_iterator Iter) {
  D = D->getCanonicalDecl();
-  if (!isStackEmpty() && Stack.back().first.size() > 1) {
+  if (!isStackEmpty()) {
    reverse_iterator I = Iter, E = Stack.back().first.rend();
    Scope *TopScope = nullptr;
-    while (I != E && !isParallelOrTaskRegion(I->Directive))
+    while (I != E && !isParallelOrTaskRegion(I->Directive) &&
+           !isOpenMPTargetExecutionDirective(I->Directive))
      ++I;
    if (I == E)
      return false;
@ -956,20 +957,7 @@ DSAStackTy::DSAVarData DSAStackTy::getTopDSA(ValueDecl *D, bool FromParent) {
  D = getCanonicalDecl(D);
  DSAVarData DVar;

-  // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced
-  // in a Construct, C/C++, predetermined, p.1]
-  //  Variables appearing in threadprivate directives are threadprivate.
  auto *VD = dyn_cast<VarDecl>(D);
-  if ((VD && VD->getTLSKind() != VarDecl::TLS_None &&
-       !(VD->hasAttr<OMPThreadPrivateDeclAttr>() &&
-         SemaRef.getLangOpts().OpenMPUseTLS &&
-         SemaRef.getASTContext().getTargetInfo().isTLSSupported())) ||
-      (VD && VD->getStorageClass() == SC_Register &&
-       VD->hasAttr<AsmLabelAttr>() && !VD->isLocalVarDecl())) {
-    addDSA(D, buildDeclRefExpr(SemaRef, VD, D->getType().getNonReferenceType(),
-                               D->getLocation()),
-           OMPC_threadprivate);
-  }
  auto TI = Threadprivates.find(D);
  if (TI != Threadprivates.end()) {
    DVar.RefExpr = TI->getSecond().RefExpr.getPointer();
@ -981,6 +969,62 @@ DSAStackTy::DSAVarData DSAStackTy::getTopDSA(ValueDecl *D, bool FromParent) {
        VD->getAttr<OMPThreadPrivateDeclAttr>()->getLocation());
    DVar.CKind = OMPC_threadprivate;
    addDSA(D, DVar.RefExpr, OMPC_threadprivate);
+    return DVar;
+  }
+  // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced
+  // in a Construct, C/C++, predetermined, p.1]
+  //  Variables appearing in threadprivate directives are threadprivate.
+  if ((VD && VD->getTLSKind() != VarDecl::TLS_None &&
+       !(VD->hasAttr<OMPThreadPrivateDeclAttr>() &&
+         SemaRef.getLangOpts().OpenMPUseTLS &&
+         SemaRef.getASTContext().getTargetInfo().isTLSSupported())) ||
+      (VD && VD->getStorageClass() == SC_Register &&
+       VD->hasAttr<AsmLabelAttr>() && !VD->isLocalVarDecl())) {
+    DVar.RefExpr = buildDeclRefExpr(
+        SemaRef, VD, D->getType().getNonReferenceType(), D->getLocation());
+    DVar.CKind = OMPC_threadprivate;
+    addDSA(D, DVar.RefExpr, OMPC_threadprivate);
+    return DVar;
+  }
+  if (SemaRef.getLangOpts().OpenMPCUDAMode && VD &&
+      VD->isLocalVarDeclOrParm() && !isStackEmpty() &&
+      !isLoopControlVariable(D).first) {
+    auto IterTarget =
+        std::find_if(Stack.back().first.rbegin(), Stack.back().first.rend(),
+                     [](const SharingMapTy &Data) {
+                       return isOpenMPTargetExecutionDirective(Data.Directive);
+                     });
+    if (IterTarget != Stack.back().first.rend()) {
+      auto ParentIterTarget = std::next(IterTarget, 1);
+      auto Iter = Stack.back().first.rbegin();
+      while (Iter != ParentIterTarget) {
+        if (isOpenMPLocal(VD, Iter)) {
+          DVar.RefExpr =
+              buildDeclRefExpr(SemaRef, VD, D->getType().getNonReferenceType(),
+                               D->getLocation());
+          DVar.CKind = OMPC_threadprivate;
+          return DVar;
+        }
+        std::advance(Iter, 1);
+      }
+      if (!isClauseParsingMode() || IterTarget != Stack.back().first.rbegin()) {
+        auto DSAIter = IterTarget->SharingMap.find(D);
+        if (DSAIter != IterTarget->SharingMap.end() &&
+            isOpenMPPrivate(DSAIter->getSecond().Attributes)) {
+          DVar.RefExpr = DSAIter->getSecond().RefExpr.getPointer();
+          DVar.CKind = OMPC_threadprivate;
+          return DVar;
+        } else if (!SemaRef.IsOpenMPCapturedByRef(
+                       D, std::distance(ParentIterTarget,
+                                        Stack.back().first.rend()))) {
+          DVar.RefExpr =
+              buildDeclRefExpr(SemaRef, VD, D->getType().getNonReferenceType(),
+                               IterTarget->ConstructLoc);
+          DVar.CKind = OMPC_threadprivate;
+          return DVar;
+        }
+      }
+    }
  }

  if (isStackEmpty())
--- a/clang/test/OpenMP/nvptx_target_cuda_mode_messages.cpp
+++ b/clang/test/OpenMP/nvptx_target_cuda_mode_messages.cpp
@ -0,0 +1,108 @@
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-cuda-mode -fopenmp-host-ir-file-path %t-ppc-host.bc -o -
+
+template <typename tx, typename ty>
+struct TT {
+  tx X;
+  ty Y;
+};
+
+int foo(int n, double *ptr) {
+  int a = 0;
+  short aa = 0;
+  float b[10];
+  double c[5][10];
+  TT<long long, char> d;
+
+#pragma omp target firstprivate(a) map(tofrom: b) // expected-note 2 {{defined as threadprivate or thread local}}
+  {
+    int c;                               // expected-note {{defined as threadprivate or thread local}}
+#pragma omp parallel shared(a, b, c, aa) // expected-error 3 {{threadprivate or thread local variable cannot be shared}}
+    b[a] = a;
+#pragma omp parallel for
+    for (int i = 0; i < 10; ++i) // expected-note {{defined as threadprivate or thread local}}
+#pragma omp parallel shared(i) // expected-error {{threadprivate or thread local variable cannot be shared}}
+    ++i;
+  }
+
+#pragma omp target map(aa, b, c, d)
+  {
+    int e;                         // expected-note {{defined as threadprivate or thread local}}
+#pragma omp parallel private(b, e) // expected-error {{threadprivate or thread local variable cannot be private}}
+    {
+      aa += 1;
+      b[2] = 1.0;
+      c[1][2] = 1.0;
+      d.X = 1;
+      d.Y = 1;
+    }
+  }
+
+#pragma omp target private(ptr)
+  {
+    ptr[0]++;
+  }
+
+  return a;
+}
+
+template <typename tx>
+tx ftemplate(int n) {
+  tx a = 0;
+  tx b[10];
+
+#pragma omp target reduction(+ \
+                             : a, b) // expected-note {{defined as threadprivate or thread local}}
+  {
+    int e;                        // expected-note {{defined as threadprivate or thread local}}
+#pragma omp parallel shared(a, e) // expected-error 2 {{threadprivate or thread local variable cannot be shared}}
+    a += 1;
+    b[2] += 1;
+  }
+
+  return a;
+}
+
+static int fstatic(int n) {
+  int a = 0;
+  char aaa = 0;
+  int b[10];
+
+#pragma omp target firstprivate(a, aaa, b)
+  {
+    a += 1;
+    aaa += 1;
+    b[2] += 1;
+  }
+
+  return a;
+}
+
+struct S1 {
+  double a;
+
+  int r1(int n) {
+    int b = n + 1;
+
+#pragma omp target firstprivate(b) // expected-note {{defined as threadprivate or thread local}}
+    {
+      int c;                      // expected-note {{defined as threadprivate or thread local}}
+#pragma omp parallel shared(b, c) // expected-error 2 {{threadprivate or thread local variable cannot be shared}}
+      this->a = (double)b + 1.5;
+    }
+
+    return (int)b;
+  }
+};
+
+int bar(int n, double *ptr) {
+  int a = 0;
+  a += foo(n, ptr);
+  S1 S;
+  a += S.r1(n);
+  a += fstatic(n);
+  a += ftemplate<int>(n); // expected-note {{in instantiation of function template specialization 'ftemplate<int>' requested here}}
+
+  return a;
+}
+