[OPENMP] Support `reduction` clause on target-based directives.

OpenMP 5.0 added support for `reduction` clause in target-based directives. Patch adds this support to clang. llvm-svn: 320596
2017-12-13 17:31:39 +00:00 · 2017-12-13 17:31:39 +00:00 · 3f96fe6d44
parent dde93259a3
commit 3f96fe6d44
6 changed files with 532 additions and 55 deletions
--- a/clang/include/clang/Basic/OpenMPKinds.def
+++ b/clang/include/clang/Basic/OpenMPKinds.def
@ -454,6 +454,7 @@ OPENMP_TARGET_CLAUSE(depend)
 OPENMP_TARGET_CLAUSE(defaultmap)
 OPENMP_TARGET_CLAUSE(firstprivate)
 OPENMP_TARGET_CLAUSE(is_device_ptr)
+OPENMP_TARGET_CLAUSE(reduction)

 // Clauses allowed for OpenMP directive 'target data'.
 OPENMP_TARGET_DATA_CLAUSE(if)
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@ -6037,6 +6037,8 @@ private:

  /// \brief Set of all first private variables in the current directive.
  llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
+  /// Set of all reduction variables in the current directive.
+  llvm::SmallPtrSet<const VarDecl *, 8> ReductionDecls;

  /// Map between device pointer declarations and their expression components.
  /// The key value for declarations in 'this' is null.
@ -6429,6 +6431,12 @@ private:
    if (FirstPrivateDecls.count(Cap.getCapturedVar()))
      return MappableExprsHandler::OMP_MAP_PRIVATE |
             MappableExprsHandler::OMP_MAP_TO;
+    // Reduction variable  will use only the 'private ptr' and 'map to_from'
+    // flag.
+    if (ReductionDecls.count(Cap.getCapturedVar())) {
+      return MappableExprsHandler::OMP_MAP_TO |
+             MappableExprsHandler::OMP_MAP_FROM;
+    }

    // We didn't modify anything.
    return CurrentModifiers;
@ -6442,6 +6450,12 @@ public:
      for (const auto *D : C->varlists())
        FirstPrivateDecls.insert(
            cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
+    for (const auto *C : Dir.getClausesOfKind<OMPReductionClause>()) {
+      for (const auto *D : C->varlists()) {
+        ReductionDecls.insert(
+            cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
+      }
+    }
    // Extract device pointer clause information.
    for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
      for (auto L : C->component_lists())
@ -6721,15 +6735,9 @@ public:
      // The default map type for a scalar/complex type is 'to' because by
      // default the value doesn't have to be retrieved. For an aggregate
      // type, the default is 'tofrom'.
-      CurMapTypes.push_back(ElementType->isAggregateType()
-                                ? (OMP_MAP_TO | OMP_MAP_FROM)
-                                : OMP_MAP_TO);
-
-      // If we have a capture by reference we may need to add the private
-      // pointer flag if the base declaration shows in some first-private
-      // clause.
-      CurMapTypes.back() =
-          adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back());
+      CurMapTypes.emplace_back(adjustMapModifiersForPrivateClauses(
+          CI, ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM)
+                                             : OMP_MAP_TO));
    }
    // Every default map produces a single argument which is a target parameter.
    CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@ -1279,9 +1279,13 @@ bool Sema::IsOpenMPCapturedByRef(ValueDecl *D, unsigned Level) {
      // reference except if it is a pointer that is dereferenced somehow.
      IsByRef = !(Ty->isPointerType() && IsVariableAssociatedWithSection);
    } else {
-      // By default, all the data that has a scalar type is mapped by copy.
-      IsByRef = !Ty->isScalarType() ||
-                DSAStack->getDefaultDMAAtLevel(Level) == DMA_tofrom_scalar;
+      // By default, all the data that has a scalar type is mapped by copy
+      // (except for reduction variables).
+      IsByRef =
+          !Ty->isScalarType() ||
+          DSAStack->getDefaultDMAAtLevel(Level) == DMA_tofrom_scalar ||
+          DSAStack->hasExplicitDSA(
+              D, [](OpenMPClauseKind K) { return K == OMPC_reduction; }, Level);
    }
  }

--- a/clang/test/OpenMP/target_reduction_codegen.cpp
+++ b/clang/test/OpenMP/target_reduction_codegen.cpp
@ -0,0 +1,215 @@
+// Only test codegen on target side, as private clause does not require any action on the host side
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template<typename tx, typename ty>
+struct TT{
+  tx X;
+  ty Y;
+  TT<tx, ty> operator*(const TT<tx, ty> &) { return *this; }
+};
+
+// TCHECK: [[S1:%.+]] = type { double }
+
+int foo(int n) {
+  int a = 0;
+  short aa = 0;
+  float b[10];
+  float bn[n];
+  double c[5][10];
+  double cn[5][n];
+  TT<long long, char> d;
+
+  #pragma omp target reduction(*:a)
+  {
+  }
+
+  // TCHECK: define void @__omp_offloading_{{.+}}(i32*{{.+}} %{{.+}})
+  // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*,
+  // TCHECK: store {{.+}}, {{.+}} [[A]],
+  // TCHECK: load i32*, i32** [[A]],
+  // TCHECK: ret void
+
+#pragma omp target reduction(+:a)
+  {
+    a = 1;
+  }
+
+  // TCHECK:  define void @__omp_offloading_{{.+}}(i32*{{.+}} %{{.+}})
+  // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*,
+  // TCHECK: store {{.+}}, {{.+}} [[A]],
+  // TCHECK: [[REF:%.+]] = load i32*, i32** [[A]],
+  // TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[REF]],
+  // TCHECK: ret void
+
+  #pragma omp target reduction(-:a, aa)
+  {
+    a = 1;
+    aa = 1;
+  }
+
+  // TCHECK:  define void @__omp_offloading_{{.+}}(i32*{{.+}} [[A:%.+]], i16*{{.+}} [[AA:%.+]])
+  // TCHECK:  [[A:%.+]] = alloca i{{[0-9]+}}*,
+  // TCHECK:  [[AA:%.+]] = alloca i{{[0-9]+}}*,
+  // TCHECK: store {{.+}}, {{.+}} [[A]],
+  // TCHECK: store {{.+}}, {{.+}} [[AA]],
+  // TCHECK: [[A_REF:%.+]] = load i32*, i32** [[A]],
+  // TCHECK: [[AA_REF:%.+]] = load i16*, i16** [[AA]],
+  // TCHECK:  store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A_REF]],
+  // TCHECK:  store i{{[0-9]+}} 1, i{{[0-9]+}}* [[AA_REF]],
+  // TCHECK:  ret void
+
+  return a;
+}
+
+
+template<typename tx>
+tx ftemplate(int n) {
+  tx a = 0;
+  short aa = 0;
+  tx b[10];
+
+#pragma omp target reduction(+:a,aa,b)
+  {
+    a = 1;
+    aa = 1;
+    b[2] = 1;
+  }
+
+  return a;
+}
+
+static
+int fstatic(int n) {
+  int a = 0;
+  short aa = 0;
+  char aaa = 0;
+  int b[10];
+
+#pragma omp target reduction(-:a,aa,aaa,b)
+  {
+    a = 1;
+    aa = 1;
+    aaa = 1;
+    b[2] = 1;
+  }
+
+  return a;
+}
+
+// TCHECK: define void @__omp_offloading_{{.+}}(i32*{{.+}}, i16*{{.+}}, i8*{{.+}}, [10 x i32]*{{.+}})
+// TCHECK:  [[A:%.+]] = alloca i{{[0-9]+}}*,
+// TCHECK:  [[A2:%.+]] = alloca i{{[0-9]+}}*,
+// TCHECK:  [[A3:%.+]] = alloca i{{[0-9]+}}*,
+// TCHECK:  [[B:%.+]] = alloca [10 x i{{[0-9]+}}]*,
+// TCHECK: store {{.+}}, {{.+}} [[A]],
+// TCHECK: store {{.+}}, {{.+}} [[A2]],
+// TCHECK: store {{.+}}, {{.+}} [[A3]],
+// TCHECK: store {{.+}}, {{.+}} [[B]],
+// TCHECK: [[A_REF:%.+]] = load i32*, i32** [[A]],
+// TCHECK: [[AA_REF:%.+]] = load i16*, i16** [[AA]],
+// TCHECK: [[A3_REF:%.+]] = load i8*, i8** [[A3]],
+// TCHECK: [[B_REF:%.+]] = load {{.+}}*, {{.+}}** [[B]],
+// TCHECK:  store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A_REF]],
+// TCHECK:  store i{{[0-9]+}} 1, i{{[0-9]+}}* [[AA_REF]],
+// TCHECK:  store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A3_REF]],
+// TCHECK:  [[B_GEP:%.+]] = getelementptr inbounds [10 x i{{[0-9]+}}], [10 x i{{[0-9]+}}]* [[B_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+// TCHECK:  store i{{[0-9]+}} 1, i{{[0-9]+}}* [[B_GEP]],
+// TCHECK:  ret void
+
+struct S1 {
+  double a;
+
+  int r1(int n){
+    int b = n+1;
+    short int c[2][n];
+
+#pragma omp target reduction(max:b,c)
+    {
+      this->a = (double)b + 1.5;
+      c[1][1] = ++a;
+    }
+
+    return c[1][1] + (int)b;
+  }
+
+  // TCHECK: define void @__omp_offloading_{{.+}}([[S1]]* [[TH:%.+]], i32*{{.+}}, i{{[0-9]+}} [[VLA:%.+]], i{{[0-9]+}} [[VLA1:%.+]], i16*{{.+}})
+  // TCHECK: [[TH_ADDR:%.+]] = alloca [[S1]]*,
+  // TCHECK: [[B_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+  // TCHECK: [[VLA_ADDR:%.+]] = alloca i{{[0-9]+}},
+  // TCHECK: [[VLA_ADDR2:%.+]] = alloca i{{[0-9]+}},
+  // TCHECK: [[C_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+  // TCHECK: store [[S1]]* [[TH]], [[S1]]** [[TH_ADDR]],
+  // TCHECK: store i{{[0-9]+}}* {{.+}}, i{{[0-9]+}}** [[B_ADDR]],
+  // TCHECK: store i{{[0-9]+}} [[VLA]], i{{[0-9]+}}* [[VLA_ADDR]],
+  // TCHECK: store i{{[0-9]+}} [[VLA1]], i{{[0-9]+}}* [[VLA_ADDR2]],
+  // TCHECK: store i{{[0-9]+}}* {{.+}}, i{{[0-9]+}}** [[C_ADDR]],
+  // TCHECK: [[TH_ADDR_REF:%.+]] = load [[S1]]*, [[S1]]** [[TH_ADDR]],
+  // TCHECK: [[B_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[B_ADDR]],
+  // TCHECK: [[VLA_ADDR_REF:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[VLA_ADDR]],
+  // TCHECK: [[VLA_ADDR_REF2:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[VLA_ADDR2]],
+  // TCHECK: [[C_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[C_ADDR]],
+
+  // this->a = (double)b + 1.5;
+  // TCHECK: [[B_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_REF]],
+  // TCHECK: [[B_CONV:%.+]] = sitofp i{{[0-9]+}} [[B_VAL]] to double
+  // TCHECK: [[NEW_A_VAL:%.+]] = fadd double [[B_CONV]], 1.5{{.+}}+00
+  // TCHECK: [[A_FIELD:%.+]] = getelementptr inbounds [[S1]], [[S1]]* [[TH_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // TCHECK: store double [[NEW_A_VAL]], double* [[A_FIELD]],
+
+  // c[1][1] = ++a;
+  // TCHECK: [[A_FIELD4:%.+]] = getelementptr inbounds [[S1]], [[S1]]* [[TH_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // TCHECK: [[A_FIELD4_VAL:%.+]] = load double, double* [[A_FIELD4]],
+  // TCHECK: [[A_FIELD_INC:%.+]] = fadd double [[A_FIELD4_VAL]], 1.0{{.+}}+00
+  // TCHECK: store double [[A_FIELD_INC]], double* [[A_FIELD4]],
+  // TCHECK: [[A_FIELD_INC_CONV:%.+]] = fptosi double [[A_FIELD_INC]] to i{{[0-9]+}}
+  // TCHECK: [[C_IND:%.+]] = mul{{.+}} i{{[0-9]+}} 1, [[VLA_ADDR_REF2]]
+  // TCHECK: [[C_1_REF:%.+]] = getelementptr inbounds i{{[0-9]+}}, i{{[0-9]+}}* [[C_REF]], i{{[0-9]+}} [[C_IND]]
+  // TCHECK: [[C_1_1_REF:%.+]] = getelementptr inbounds i{{[0-9]+}}, i{{[0-9]+}}* [[C_1_REF]], i{{[0-9]+}} 1
+  // TCHECK: store i{{[0-9]+}} [[A_FIELD_INC_CONV]], i{{[0-9]+}}* [[C_1_1_REF]],
+
+  // finish
+  // TCHECK: ret void
+};
+
+
+int bar(int n){
+  int a = 0;
+  a += foo(n);
+  S1 S;
+  a += S.r1(n);
+  a += fstatic(n);
+  a += ftemplate<int>(n);
+
+  return a;
+}
+
+// template
+// TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}}, [10 x i32]*{{.+}})
+// TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*,
+// TCHECK: [[A2:%.+]] = alloca i{{[0-9]+}}*,
+// TCHECK: [[B:%.+]] = alloca [10 x i{{[0-9]+}}]*,
+// TCHECK: store {{.+}}, {{.+}} [[A]],
+// TCHECK: store {{.+}}, {{.+}} [[A2]],
+// TCHECK: store {{.+}}, {{.+}} [[B]],
+// TCHECK: [[A_REF:%.+]] = load i32*, i32** [[A]],
+// TCHECK: [[AA_REF:%.+]] = load i16*, i16** [[AA]],
+// TCHECK: [[B_REF:%.+]] = load {{.+}}*, {{.+}}** [[B]],
+// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A_REF]],
+// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[AA_REF]],
+// TCHECK: [[B_GEP:%.+]] = getelementptr inbounds [10 x i{{[0-9]+}}], [10 x i{{[0-9]+}}]* [[B_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[B_GEP]],
+// TCHECK: ret void
+
+#endif
--- a/clang/test/OpenMP/target_reduction_messages.cpp
+++ b/clang/test/OpenMP/target_reduction_messages.cpp
@ -0,0 +1,262 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+void foobar(int &ref) {
+#pragma omp target reduction(+:ref)
+  foo();
+}
+
+struct S1; // expected-note {{declared here}} expected-note 4 {{forward declaration of 'S1'}}
+extern S1 a;
+class S2 {
+  mutable int a;
+  S2 &operator+(const S2 &arg) { return (*this); } // expected-note 3 {{implicitly declared private here}}
+
+public:
+  S2() : a(0) {}
+  S2(S2 &s2) : a(s2.a) {}
+  static float S2s; // expected-note 2 {{static data member is predetermined as shared}}
+  static const float S2sc; // expected-note 2 {{'S2sc' declared here}}
+};
+const float S2::S2sc = 0;
+S2 b;                     // expected-note 3 {{'b' defined here}}
+const S2 ba[5];           // expected-note 2 {{'ba' defined here}}
+class S3 {
+  int a;
+
+public:
+  int b;
+  S3() : a(0) {}
+  S3(const S3 &s3) : a(s3.a) {}
+  S3 operator+(const S3 &arg1) { return arg1; }
+};
+int operator+(const S3 &arg1, const S3 &arg2) { return 5; }
+S3 c;               // expected-note 3 {{'c' defined here}}
+const S3 ca[5];     // expected-note 2 {{'ca' defined here}}
+extern const int f; // expected-note 4 {{'f' declared here}}
+class S4 {
+  int a;
+  S4(); // expected-note {{implicitly declared private here}}
+  S4(const S4 &s4);
+  S4 &operator+(const S4 &arg) { return (*this); }
+
+public:
+  S4(int v) : a(v) {}
+};
+S4 &operator&=(S4 &arg1, S4 &arg2) { return arg1; }
+class S5 {
+  int a;
+  S5() : a(0) {} // expected-note {{implicitly declared private here}}
+  S5(const S5 &s5) : a(s5.a) {}
+  S5 &operator+(const S5 &arg);
+
+public:
+  S5(int v) : a(v) {}
+};
+class S6 { // expected-note 3 {{candidate function (the implicit copy assignment operator) not viable: no known conversion from 'int' to 'const S6' for 1st argument}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 3 {{candidate function (the implicit move assignment operator) not viable}}
+#endif
+  int a;
+
+public:
+  S6() : a(6) {}
+  operator int() { return 6; }
+} o;
+
+S3 h, k;
+#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
+
+template <class T>       // expected-note {{declared here}}
+T tmain(T argc) {
+  const T d = T();       // expected-note 4 {{'d' defined here}}
+  const T da[5] = {T()}; // expected-note 2 {{'da' defined here}}
+  T qa[5] = {T()};
+  T i;
+  T &j = i;                    // expected-note 4 {{'j' defined here}}
+  S3 &p = k;                   // expected-note 2 {{'p' defined here}}
+  const T &r = da[(int)i];     // expected-note 2 {{'r' defined here}}
+  T &q = qa[(int)i];           // expected-note 2 {{'q' defined here}}
+  T fl;
+#pragma omp target reduction // expected-error {{expected '(' after 'reduction'}}
+  foo();
+#pragma omp target reduction + // expected-error {{expected '(' after 'reduction'}} expected-warning {{extra tokens at the end of '#pragma omp target' are ignored}}
+  foo();
+#pragma omp target reduction( // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  foo();
+#pragma omp target reduction(- // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  foo();
+#pragma omp target reduction() // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}}
+  foo();
+#pragma omp target reduction(*) // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected expression}}
+  foo();
+#pragma omp target reduction(\) // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}}
+  foo();
+#pragma omp target reduction(& : argc // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}}
+  foo();
+#pragma omp target reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}}
+  foo();
+#pragma omp target reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name, array element or array section}}
+  foo();
+#pragma omp target reduction(foo : argc) //expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max' or declare reduction for type 'float'}} expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max' or declare reduction for type 'int'}}
+  foo();
+#pragma omp target reduction(&& : argc)
+  foo();
+#pragma omp target reduction(^ : T) // expected-error {{'T' does not refer to a value}}
+  foo();
+#pragma omp target reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 3 {{const-qualified list item cannot be reduction}} expected-error 2 {{'operator+' is a private member of 'S2'}}
+  foo();
+#pragma omp target reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 4 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified list item cannot be reduction}}
+  foo();
+#pragma omp target reduction(max : h.b) // expected-error {{expected variable name, array element or array section}}
+  foo();
+#pragma omp target reduction(+ : ba) // expected-error {{const-qualified list item cannot be reduction}}
+  foo();
+#pragma omp target reduction(* : ca) // expected-error {{const-qualified list item cannot be reduction}}
+  foo();
+#pragma omp target reduction(- : da) // expected-error {{const-qualified list item cannot be reduction}} expected-error {{const-qualified list item cannot be reduction}}
+  foo();
+#pragma omp target reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}}
+  foo();
+#pragma omp target reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}}
+  foo();
+#pragma omp target reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}}
+  foo();
+#pragma omp target reduction(+ : h, k) // expected-error {{threadprivate or thread local variable cannot be reduction}}
+  foo();
+#pragma omp target reduction(+ : o) // expected-error 2 {{no viable overloaded '='}}
+  foo();
+#pragma omp parallel private(i), reduction(+ : j), reduction(+ : q) // expected-error 4 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}}
+  foo();
+#pragma omp parallel private(k)
+#pragma omp target reduction(+ : p), reduction(+ : p) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}}
+  foo();
+#pragma omp target reduction(+ : p), reduction(+ : p) // expected-error 2 {{variable can appear only once in OpenMP 'reduction' clause}} expected-note 2 {{previously referenced here}}
+  foo();
+#pragma omp target reduction(+ : r) // expected-error 2 {{const-qualified list item cannot be reduction}}
+  foo();
+#pragma omp parallel shared(i)
+#pragma omp target reduction(min : i)
+#pragma omp parallel reduction(max : j) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}}
+  foo();
+#pragma omp parallel
+#pragma omp for private(fl)
+  for (int i = 0; i < 10; ++i)
+#pragma omp target reduction(+ : fl)
+    foo();
+#pragma omp parallel
+#pragma omp for reduction(- : fl)
+  for (int i = 0; i < 10; ++i)
+#pragma omp target reduction(+ : fl)
+    foo();
+
+  return T();
+}
+
+namespace A {
+double x;
+#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}}
+}
+namespace B {
+using A::x;
+}
+
+int main(int argc, char **argv) {
+  const int d = 5;       // expected-note 2 {{'d' defined here}}
+  const int da[5] = {0}; // expected-note {{'da' defined here}}
+  int qa[5] = {0};
+  S4 e(4);
+  S5 g(5);
+  int i;
+  int &j = i;                  // expected-note 2 {{'j' defined here}}
+  S3 &p = k;                   // expected-note 2 {{'p' defined here}}
+  const int &r = da[i];        // expected-note {{'r' defined here}}
+  int &q = qa[i];              // expected-note {{'q' defined here}}
+  float fl;
+#pragma omp target reduction // expected-error {{expected '(' after 'reduction'}}
+  foo();
+#pragma omp target reduction + // expected-error {{expected '(' after 'reduction'}} expected-warning {{extra tokens at the end of '#pragma omp target' are ignored}}
+  foo();
+#pragma omp target reduction( // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  foo();
+#pragma omp target reduction(- // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  foo();
+#pragma omp target reduction() // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}}
+  foo();
+#pragma omp target reduction(*) // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected expression}}
+  foo();
+#pragma omp target reduction(\) // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}}
+  foo();
+#pragma omp target reduction(foo : argc // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max'}}
+  foo();
+#pragma omp target reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  foo();
+#pragma omp target reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name, array element or array section}}
+  foo();
+#pragma omp target reduction(~ : argc) // expected-error {{expected unqualified-id}}
+  foo();
+#pragma omp target reduction(&& : argc)
+  foo();
+#pragma omp target reduction(^ : S1) // expected-error {{'S1' does not refer to a value}}
+  foo();
+#pragma omp target reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{const-qualified list item cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}}
+  foo();
+#pragma omp target reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified list item cannot be reduction}}
+  foo();
+#pragma omp target reduction(max : h.b) // expected-error {{expected variable name, array element or array section}}
+  foo();
+#pragma omp target reduction(+ : ba) // expected-error {{const-qualified list item cannot be reduction}}
+  foo();
+#pragma omp target reduction(* : ca) // expected-error {{const-qualified list item cannot be reduction}}
+  foo();
+#pragma omp target reduction(- : da) // expected-error {{const-qualified list item cannot be reduction}}
+  foo();
+#pragma omp target reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}}
+  foo();
+#pragma omp target reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}}
+  foo();
+#pragma omp target reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}}
+  foo();
+#pragma omp target reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{nvalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+  foo();
+#pragma omp target reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
+  foo();
+#pragma omp target reduction(+ : o) // expected-error {{no viable overloaded '='}}
+  foo();
+#pragma omp parallel private(i), reduction(+ : j), reduction(+ : q) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}}
+  foo();
+#pragma omp parallel private(k)
+#pragma omp target reduction(+ : p), reduction(+ : p) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}}
+  foo();
+#pragma omp target reduction(+ : p), reduction(+ : p) // expected-error {{variable can appear only once in OpenMP 'reduction' clause}} expected-note {{previously referenced here}}
+  foo();
+#pragma omp target reduction(+ : r) // expected-error {{const-qualified list item cannot be reduction}}
+  foo();
+#pragma omp parallel shared(i)
+#pragma omp target reduction(min : i)
+#pragma omp parallel reduction(max : j) // expected-error {{argument of OpenMP clause 'reduction' must reference the same object in all threads}}
+  foo();
+#pragma omp parallel
+#pragma omp for private(fl)
+  for (int i = 0; i < 10; ++i)
+#pragma omp target reduction(+ : fl)
+    foo();
+#pragma omp parallel
+#pragma omp for reduction(- : fl)
+  for (int i = 0; i < 10; ++i)
+#pragma omp target reduction(+ : fl)
+    foo();
+  static int m;
+#pragma omp target reduction(+ : m) // OK
+  m++;
+
+  return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain<int>' requested here}} expected-note {{in instantiation of function template specialization 'tmain<float>' requested here}}
+}
--- a/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp
@ -38,24 +38,22 @@ int main() {
    // LAMBDA:  ret
 #pragma omp target teams distribute reduction(+: sivar)
  for (int i = 0; i < 2; ++i) {
-    // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} [[SIVAR_ARG:%.+]])
-    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
-    // LAMBDA: [[SIVAR_CASTED:%.+]] = alloca i{{.+}},
+    // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i32*{{.+}} [[SIVAR_ARG:%.+]])
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
    // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
-    // LAMBDA: [[SIVAR_CONV:%.+]] = bitcast{{.+}} [[SIVAR_ADDR]] to
-    // LAMBDA: [[SIVAR:%.+]] = load i64, i64* [[SIVAR_CASTED]],
+    // LAMBDA: [[SIVAR:%.+]] = load i32*, i32** [[SIVAR_ADDR]],
    // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR]])
    // LAMBDA: ret void

-    // LAMBDA: define internal void @[[LOUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+    // LAMBDA: define internal void @[[LOUTL1]]({{.+}}, {{.+}}, {{.+}}*{{.+}} [[SIVAR_ARG:%.+]])
    // Skip global and bound tid vars
    // LAMBDA: {{.+}} = alloca i32*,
    // LAMBDA: {{.+}} = alloca i32*,
-    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
    // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
    // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
    // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
-    // LAMBDA: [[SIVAR_REF:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32*
+    // LAMBDA: [[SIVAR_REF:%.+]] = load {{.+}}, {{.+}} [[SIVAR_ADDR]],
    // LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],

    // LAMBDA: call void @__kmpc_for_static_init_4(
@ -114,28 +112,26 @@ int main() {

 // CHECK: define {{.*}}i{{[0-9]+}} @main()
 // CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
-// CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
-// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
-// CHECK:  ret
+// CHECK: call void @[[OFFL1:.+]](i32* {{.+}})
+// CHECK: [[RES:%.+]] = call{{.*}} i32 @[[TMAIN_INT:[^(]+]]()
+// CHECK: ret i32 [[RES]]

-// CHECK: define{{.*}} void @[[OFFL1]](i{{64|32}} [[SIVAR_ARG:%.+]])
-// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
-// CHECK: [[SIVAR_CASTED:%.+]] = alloca i{{.+}},
-// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
-// CHECK-64: [[SIVAR_LOAD:%.+]] = load i64, i64* [[SIVAR_CASTED]],
-// CHECK-32: [[SIVAR_LOAD:%.+]] = load i32, i32* [[SIVAR_CASTED]],
+// CHECK: define{{.*}} void @[[OFFL1]](i32*{{.+}} [[SIVAR_ARG:%.+]])
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
+// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}}** [[SIVAR_ADDR]],
+// CHECK: [[SIVAR_LOAD:%.+]] = load i32*, i32** [[SIVAR_ADDR]],
 // CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_LOAD]])
 // CHECK: ret void

-// CHECK: define internal void @[[OUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+// CHECK: define internal void @[[OUTL1]]({{.+}}, {{.+}}, i32*{{.+}} [[SIVAR_ARG:%.+]])
 // Skip global and bound tid vars
 // CHECK: {{.+}} = alloca i32*,
 // CHECK: {{.+}} = alloca i32*,
-// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
-// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i32,
 // CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
 // CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
-// CHECK-64: [[SIVAR_REF:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32*
+// CHECK: [[SIVAR_REF:%.+]] = load i32*, i32** [[SIVAR_ADDR]],
 // CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],

 // CHECK: call void @__kmpc_for_static_init_4(
@ -151,46 +147,40 @@ int main() {
 // CHECK: {{.+}}, label %[[CASE2:.+]]
 // CHECK: ]
 // CHECK: [[CASE1]]:
-// CHECK-64-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
-// CHECK-32-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]],
+// CHECK-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
 // CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
 // CHECK-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
-// CHECK-64: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
-// CHECK-32: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_ADDR]],
+// CHECK: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
 // CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-// CHECK-64-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
-// CHECK-32-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_ADDR]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
 // CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
 // CHECK: br


 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
 // CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
-// CHECK: call void @[[TOFFL1:.+]]({{.+}})
+// CHECK: call void @[[TOFFL1:.+]]({{.+}}* {{.+}})
 // CHECK:  ret

-// CHECK: define{{.*}} void @[[TOFFL1]](i{{64|32}} [[TVAR_ARG:%.+]])
-// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}},
-// CHECK: [[TVAR_CASTED_ADDR:%.+]] = alloca i{{.+}},
+// CHECK: define{{.*}} void @[[TOFFL1]](i32*{{.+}} [[TVAR_ARG:%.+]])
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*,
 // CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
-// CHECK-64: [[TVAR_CONV:%.+]] = bitcast{{.+}} [[TVAR_ADDR]] to
-// CHECK-64: [[TVAR:%.+]] = load i64, i64* [[TVAR_CASTED_ADDR]],
-// CHECK-32: [[TVAR:%.+]] = load i32, i32* [[TVAR_CASTED_ADDR]],
+// CHECK: [[TVAR:%.+]] = load i32*, i32** [[TVAR_ADDR]],
 // CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR]])
 // CHECK: ret void

-// CHECK: define internal void @[[TOUTL1]]({{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]])
+// CHECK: define internal void @[[TOUTL1]]({{.+}}, {{.+}}, {{.+}}*{{.+}} [[TVAR_ARG:%.+]])
 // Skip global and bound tid vars
 // CHECK: {{.+}} = alloca i32*,
 // CHECK: {{.+}} = alloca i32*,
-// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}},
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*,
 // CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}},
 // CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
 // CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
-// CHECK-64: [[TVAR_REF:%.+]] = bitcast i64* [[TVAR_ADDR]] to i32*
+// CHECK: [[TVAR_REF:%.+]] = load i32*, i32** [[TVAR_ADDR]],
 // CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]],

 // CHECK: call void @__kmpc_for_static_init_4(
@ -206,18 +196,15 @@ int main() {
 // CHECK: {{.+}}, label %[[CASE2:.+]]
 // CHECK: ]
 // CHECK: [[CASE1]]:
-// CHECK-64-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]],
-// CHECK-32-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_ADDR]],
+// CHECK-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]],
 // CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
 // CHECK-DAG: [[TVAR_INC:%.+]] = add{{.+}} [[TVAR_VAL]], [[TVAR_PRIV_VAL]]
-// CHECK-64: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]],
-// CHECK-32: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_ADDR]],
+// CHECK: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]],
 // CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
-// CHECK-64-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
-// CHECK-32-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_ADDR]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
 // CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
 // CHECK: br