[OPENMP] Treat local variables in CUDA mode as thread local.

In CUDA mode all local variables are actually thread
local|threadprivate, not private, and, thus, they cannot be shared
between threads|lanes.

llvm-svn: 326590
This commit is contained in:
Alexey Bataev 2018-03-02 17:17:12 +00:00
parent b9699c009d
commit 852525de25
3 changed files with 168 additions and 16 deletions

View File

@ -1427,7 +1427,7 @@ def fopenmp_simd : Flag<["-"], "fopenmp-simd">, Group<f_Group>, Flags<[CC1Option
HelpText<"Emit OpenMP code only for SIMD-based constructs.">;
def fno_openmp_simd : Flag<["-"], "fno-openmp-simd">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
def fopenmp_cuda_mode : Flag<["-"], "fopenmp-cuda-mode">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
def fno_openmp_cuda_mode : Flag<["-"], "fno-openmp-cuda-mode">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
def fno_openmp_cuda_mode : Flag<["-"], "fno-openmp-cuda-mode">, Group<f_Group>, Flags<[NoArgumentUnused]>;
def fno_optimize_sibling_calls : Flag<["-"], "fno-optimize-sibling-calls">, Group<f_Group>;
def foptimize_sibling_calls : Flag<["-"], "foptimize-sibling-calls">, Group<f_Group>;
def fno_escaping_block_tail_calls : Flag<["-"], "fno-escaping-block-tail-calls">, Group<f_Group>, Flags<[CC1Option]>;

View File

@ -936,10 +936,11 @@ DSAStackTy::getTopMostTaskgroupReductionData(ValueDecl *D, SourceRange &SR,
bool DSAStackTy::isOpenMPLocal(VarDecl *D, StackTy::reverse_iterator Iter) {
D = D->getCanonicalDecl();
if (!isStackEmpty() && Stack.back().first.size() > 1) {
if (!isStackEmpty()) {
reverse_iterator I = Iter, E = Stack.back().first.rend();
Scope *TopScope = nullptr;
while (I != E && !isParallelOrTaskRegion(I->Directive))
while (I != E && !isParallelOrTaskRegion(I->Directive) &&
!isOpenMPTargetExecutionDirective(I->Directive))
++I;
if (I == E)
return false;
@ -956,20 +957,7 @@ DSAStackTy::DSAVarData DSAStackTy::getTopDSA(ValueDecl *D, bool FromParent) {
D = getCanonicalDecl(D);
DSAVarData DVar;
// OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced
// in a Construct, C/C++, predetermined, p.1]
// Variables appearing in threadprivate directives are threadprivate.
auto *VD = dyn_cast<VarDecl>(D);
if ((VD && VD->getTLSKind() != VarDecl::TLS_None &&
!(VD->hasAttr<OMPThreadPrivateDeclAttr>() &&
SemaRef.getLangOpts().OpenMPUseTLS &&
SemaRef.getASTContext().getTargetInfo().isTLSSupported())) ||
(VD && VD->getStorageClass() == SC_Register &&
VD->hasAttr<AsmLabelAttr>() && !VD->isLocalVarDecl())) {
addDSA(D, buildDeclRefExpr(SemaRef, VD, D->getType().getNonReferenceType(),
D->getLocation()),
OMPC_threadprivate);
}
auto TI = Threadprivates.find(D);
if (TI != Threadprivates.end()) {
DVar.RefExpr = TI->getSecond().RefExpr.getPointer();
@ -981,6 +969,62 @@ DSAStackTy::DSAVarData DSAStackTy::getTopDSA(ValueDecl *D, bool FromParent) {
VD->getAttr<OMPThreadPrivateDeclAttr>()->getLocation());
DVar.CKind = OMPC_threadprivate;
addDSA(D, DVar.RefExpr, OMPC_threadprivate);
return DVar;
}
// OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced
// in a Construct, C/C++, predetermined, p.1]
// Variables appearing in threadprivate directives are threadprivate.
if ((VD && VD->getTLSKind() != VarDecl::TLS_None &&
!(VD->hasAttr<OMPThreadPrivateDeclAttr>() &&
SemaRef.getLangOpts().OpenMPUseTLS &&
SemaRef.getASTContext().getTargetInfo().isTLSSupported())) ||
(VD && VD->getStorageClass() == SC_Register &&
VD->hasAttr<AsmLabelAttr>() && !VD->isLocalVarDecl())) {
DVar.RefExpr = buildDeclRefExpr(
SemaRef, VD, D->getType().getNonReferenceType(), D->getLocation());
DVar.CKind = OMPC_threadprivate;
addDSA(D, DVar.RefExpr, OMPC_threadprivate);
return DVar;
}
if (SemaRef.getLangOpts().OpenMPCUDAMode && VD &&
VD->isLocalVarDeclOrParm() && !isStackEmpty() &&
!isLoopControlVariable(D).first) {
auto IterTarget =
std::find_if(Stack.back().first.rbegin(), Stack.back().first.rend(),
[](const SharingMapTy &Data) {
return isOpenMPTargetExecutionDirective(Data.Directive);
});
if (IterTarget != Stack.back().first.rend()) {
auto ParentIterTarget = std::next(IterTarget, 1);
auto Iter = Stack.back().first.rbegin();
while (Iter != ParentIterTarget) {
if (isOpenMPLocal(VD, Iter)) {
DVar.RefExpr =
buildDeclRefExpr(SemaRef, VD, D->getType().getNonReferenceType(),
D->getLocation());
DVar.CKind = OMPC_threadprivate;
return DVar;
}
std::advance(Iter, 1);
}
if (!isClauseParsingMode() || IterTarget != Stack.back().first.rbegin()) {
auto DSAIter = IterTarget->SharingMap.find(D);
if (DSAIter != IterTarget->SharingMap.end() &&
isOpenMPPrivate(DSAIter->getSecond().Attributes)) {
DVar.RefExpr = DSAIter->getSecond().RefExpr.getPointer();
DVar.CKind = OMPC_threadprivate;
return DVar;
} else if (!SemaRef.IsOpenMPCapturedByRef(
D, std::distance(ParentIterTarget,
Stack.back().first.rend()))) {
DVar.RefExpr =
buildDeclRefExpr(SemaRef, VD, D->getType().getNonReferenceType(),
IterTarget->ConstructLoc);
DVar.CKind = OMPC_threadprivate;
return DVar;
}
}
}
}
if (isStackEmpty())

View File

@ -0,0 +1,108 @@
// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-cuda-mode -fopenmp-host-ir-file-path %t-ppc-host.bc -o -
template <typename tx, typename ty>
struct TT {
tx X;
ty Y;
};
int foo(int n, double *ptr) {
int a = 0;
short aa = 0;
float b[10];
double c[5][10];
TT<long long, char> d;
#pragma omp target firstprivate(a) map(tofrom: b) // expected-note 2 {{defined as threadprivate or thread local}}
{
int c; // expected-note {{defined as threadprivate or thread local}}
#pragma omp parallel shared(a, b, c, aa) // expected-error 3 {{threadprivate or thread local variable cannot be shared}}
b[a] = a;
#pragma omp parallel for
for (int i = 0; i < 10; ++i) // expected-note {{defined as threadprivate or thread local}}
#pragma omp parallel shared(i) // expected-error {{threadprivate or thread local variable cannot be shared}}
++i;
}
#pragma omp target map(aa, b, c, d)
{
int e; // expected-note {{defined as threadprivate or thread local}}
#pragma omp parallel private(b, e) // expected-error {{threadprivate or thread local variable cannot be private}}
{
aa += 1;
b[2] = 1.0;
c[1][2] = 1.0;
d.X = 1;
d.Y = 1;
}
}
#pragma omp target private(ptr)
{
ptr[0]++;
}
return a;
}
template <typename tx>
tx ftemplate(int n) {
tx a = 0;
tx b[10];
#pragma omp target reduction(+ \
: a, b) // expected-note {{defined as threadprivate or thread local}}
{
int e; // expected-note {{defined as threadprivate or thread local}}
#pragma omp parallel shared(a, e) // expected-error 2 {{threadprivate or thread local variable cannot be shared}}
a += 1;
b[2] += 1;
}
return a;
}
static int fstatic(int n) {
int a = 0;
char aaa = 0;
int b[10];
#pragma omp target firstprivate(a, aaa, b)
{
a += 1;
aaa += 1;
b[2] += 1;
}
return a;
}
struct S1 {
double a;
int r1(int n) {
int b = n + 1;
#pragma omp target firstprivate(b) // expected-note {{defined as threadprivate or thread local}}
{
int c; // expected-note {{defined as threadprivate or thread local}}
#pragma omp parallel shared(b, c) // expected-error 2 {{threadprivate or thread local variable cannot be shared}}
this->a = (double)b + 1.5;
}
return (int)b;
}
};
int bar(int n, double *ptr) {
int a = 0;
a += foo(n, ptr);
S1 S;
a += S.r1(n);
a += fstatic(n);
a += ftemplate<int>(n); // expected-note {{in instantiation of function template specialization 'ftemplate<int>' requested here}}
return a;
}