forked from OSchip/llvm-project
[OPENMP] Treat local variables in CUDA mode as thread local.
In CUDA mode all local variables are actually thread local|threadprivate, not private, and, thus, they cannot be shared between threads|lanes. llvm-svn: 326590
This commit is contained in:
parent
b9699c009d
commit
852525de25
|
@ -1427,7 +1427,7 @@ def fopenmp_simd : Flag<["-"], "fopenmp-simd">, Group<f_Group>, Flags<[CC1Option
|
|||
HelpText<"Emit OpenMP code only for SIMD-based constructs.">;
|
||||
def fno_openmp_simd : Flag<["-"], "fno-openmp-simd">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
|
||||
def fopenmp_cuda_mode : Flag<["-"], "fopenmp-cuda-mode">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
|
||||
def fno_openmp_cuda_mode : Flag<["-"], "fno-openmp-cuda-mode">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
|
||||
def fno_openmp_cuda_mode : Flag<["-"], "fno-openmp-cuda-mode">, Group<f_Group>, Flags<[NoArgumentUnused]>;
|
||||
def fno_optimize_sibling_calls : Flag<["-"], "fno-optimize-sibling-calls">, Group<f_Group>;
|
||||
def foptimize_sibling_calls : Flag<["-"], "foptimize-sibling-calls">, Group<f_Group>;
|
||||
def fno_escaping_block_tail_calls : Flag<["-"], "fno-escaping-block-tail-calls">, Group<f_Group>, Flags<[CC1Option]>;
|
||||
|
|
|
@ -936,10 +936,11 @@ DSAStackTy::getTopMostTaskgroupReductionData(ValueDecl *D, SourceRange &SR,
|
|||
|
||||
bool DSAStackTy::isOpenMPLocal(VarDecl *D, StackTy::reverse_iterator Iter) {
|
||||
D = D->getCanonicalDecl();
|
||||
if (!isStackEmpty() && Stack.back().first.size() > 1) {
|
||||
if (!isStackEmpty()) {
|
||||
reverse_iterator I = Iter, E = Stack.back().first.rend();
|
||||
Scope *TopScope = nullptr;
|
||||
while (I != E && !isParallelOrTaskRegion(I->Directive))
|
||||
while (I != E && !isParallelOrTaskRegion(I->Directive) &&
|
||||
!isOpenMPTargetExecutionDirective(I->Directive))
|
||||
++I;
|
||||
if (I == E)
|
||||
return false;
|
||||
|
@ -956,20 +957,7 @@ DSAStackTy::DSAVarData DSAStackTy::getTopDSA(ValueDecl *D, bool FromParent) {
|
|||
D = getCanonicalDecl(D);
|
||||
DSAVarData DVar;
|
||||
|
||||
// OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced
|
||||
// in a Construct, C/C++, predetermined, p.1]
|
||||
// Variables appearing in threadprivate directives are threadprivate.
|
||||
auto *VD = dyn_cast<VarDecl>(D);
|
||||
if ((VD && VD->getTLSKind() != VarDecl::TLS_None &&
|
||||
!(VD->hasAttr<OMPThreadPrivateDeclAttr>() &&
|
||||
SemaRef.getLangOpts().OpenMPUseTLS &&
|
||||
SemaRef.getASTContext().getTargetInfo().isTLSSupported())) ||
|
||||
(VD && VD->getStorageClass() == SC_Register &&
|
||||
VD->hasAttr<AsmLabelAttr>() && !VD->isLocalVarDecl())) {
|
||||
addDSA(D, buildDeclRefExpr(SemaRef, VD, D->getType().getNonReferenceType(),
|
||||
D->getLocation()),
|
||||
OMPC_threadprivate);
|
||||
}
|
||||
auto TI = Threadprivates.find(D);
|
||||
if (TI != Threadprivates.end()) {
|
||||
DVar.RefExpr = TI->getSecond().RefExpr.getPointer();
|
||||
|
@ -981,6 +969,62 @@ DSAStackTy::DSAVarData DSAStackTy::getTopDSA(ValueDecl *D, bool FromParent) {
|
|||
VD->getAttr<OMPThreadPrivateDeclAttr>()->getLocation());
|
||||
DVar.CKind = OMPC_threadprivate;
|
||||
addDSA(D, DVar.RefExpr, OMPC_threadprivate);
|
||||
return DVar;
|
||||
}
|
||||
// OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced
|
||||
// in a Construct, C/C++, predetermined, p.1]
|
||||
// Variables appearing in threadprivate directives are threadprivate.
|
||||
if ((VD && VD->getTLSKind() != VarDecl::TLS_None &&
|
||||
!(VD->hasAttr<OMPThreadPrivateDeclAttr>() &&
|
||||
SemaRef.getLangOpts().OpenMPUseTLS &&
|
||||
SemaRef.getASTContext().getTargetInfo().isTLSSupported())) ||
|
||||
(VD && VD->getStorageClass() == SC_Register &&
|
||||
VD->hasAttr<AsmLabelAttr>() && !VD->isLocalVarDecl())) {
|
||||
DVar.RefExpr = buildDeclRefExpr(
|
||||
SemaRef, VD, D->getType().getNonReferenceType(), D->getLocation());
|
||||
DVar.CKind = OMPC_threadprivate;
|
||||
addDSA(D, DVar.RefExpr, OMPC_threadprivate);
|
||||
return DVar;
|
||||
}
|
||||
if (SemaRef.getLangOpts().OpenMPCUDAMode && VD &&
|
||||
VD->isLocalVarDeclOrParm() && !isStackEmpty() &&
|
||||
!isLoopControlVariable(D).first) {
|
||||
auto IterTarget =
|
||||
std::find_if(Stack.back().first.rbegin(), Stack.back().first.rend(),
|
||||
[](const SharingMapTy &Data) {
|
||||
return isOpenMPTargetExecutionDirective(Data.Directive);
|
||||
});
|
||||
if (IterTarget != Stack.back().first.rend()) {
|
||||
auto ParentIterTarget = std::next(IterTarget, 1);
|
||||
auto Iter = Stack.back().first.rbegin();
|
||||
while (Iter != ParentIterTarget) {
|
||||
if (isOpenMPLocal(VD, Iter)) {
|
||||
DVar.RefExpr =
|
||||
buildDeclRefExpr(SemaRef, VD, D->getType().getNonReferenceType(),
|
||||
D->getLocation());
|
||||
DVar.CKind = OMPC_threadprivate;
|
||||
return DVar;
|
||||
}
|
||||
std::advance(Iter, 1);
|
||||
}
|
||||
if (!isClauseParsingMode() || IterTarget != Stack.back().first.rbegin()) {
|
||||
auto DSAIter = IterTarget->SharingMap.find(D);
|
||||
if (DSAIter != IterTarget->SharingMap.end() &&
|
||||
isOpenMPPrivate(DSAIter->getSecond().Attributes)) {
|
||||
DVar.RefExpr = DSAIter->getSecond().RefExpr.getPointer();
|
||||
DVar.CKind = OMPC_threadprivate;
|
||||
return DVar;
|
||||
} else if (!SemaRef.IsOpenMPCapturedByRef(
|
||||
D, std::distance(ParentIterTarget,
|
||||
Stack.back().first.rend()))) {
|
||||
DVar.RefExpr =
|
||||
buildDeclRefExpr(SemaRef, VD, D->getType().getNonReferenceType(),
|
||||
IterTarget->ConstructLoc);
|
||||
DVar.CKind = OMPC_threadprivate;
|
||||
return DVar;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (isStackEmpty())
|
||||
|
|
|
@ -0,0 +1,108 @@
|
|||
// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-cuda-mode -fopenmp-host-ir-file-path %t-ppc-host.bc -o -
|
||||
|
||||
template <typename tx, typename ty>
|
||||
struct TT {
|
||||
tx X;
|
||||
ty Y;
|
||||
};
|
||||
|
||||
int foo(int n, double *ptr) {
|
||||
int a = 0;
|
||||
short aa = 0;
|
||||
float b[10];
|
||||
double c[5][10];
|
||||
TT<long long, char> d;
|
||||
|
||||
#pragma omp target firstprivate(a) map(tofrom: b) // expected-note 2 {{defined as threadprivate or thread local}}
|
||||
{
|
||||
int c; // expected-note {{defined as threadprivate or thread local}}
|
||||
#pragma omp parallel shared(a, b, c, aa) // expected-error 3 {{threadprivate or thread local variable cannot be shared}}
|
||||
b[a] = a;
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < 10; ++i) // expected-note {{defined as threadprivate or thread local}}
|
||||
#pragma omp parallel shared(i) // expected-error {{threadprivate or thread local variable cannot be shared}}
|
||||
++i;
|
||||
}
|
||||
|
||||
#pragma omp target map(aa, b, c, d)
|
||||
{
|
||||
int e; // expected-note {{defined as threadprivate or thread local}}
|
||||
#pragma omp parallel private(b, e) // expected-error {{threadprivate or thread local variable cannot be private}}
|
||||
{
|
||||
aa += 1;
|
||||
b[2] = 1.0;
|
||||
c[1][2] = 1.0;
|
||||
d.X = 1;
|
||||
d.Y = 1;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp target private(ptr)
|
||||
{
|
||||
ptr[0]++;
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
template <typename tx>
|
||||
tx ftemplate(int n) {
|
||||
tx a = 0;
|
||||
tx b[10];
|
||||
|
||||
#pragma omp target reduction(+ \
|
||||
: a, b) // expected-note {{defined as threadprivate or thread local}}
|
||||
{
|
||||
int e; // expected-note {{defined as threadprivate or thread local}}
|
||||
#pragma omp parallel shared(a, e) // expected-error 2 {{threadprivate or thread local variable cannot be shared}}
|
||||
a += 1;
|
||||
b[2] += 1;
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
static int fstatic(int n) {
|
||||
int a = 0;
|
||||
char aaa = 0;
|
||||
int b[10];
|
||||
|
||||
#pragma omp target firstprivate(a, aaa, b)
|
||||
{
|
||||
a += 1;
|
||||
aaa += 1;
|
||||
b[2] += 1;
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
struct S1 {
|
||||
double a;
|
||||
|
||||
int r1(int n) {
|
||||
int b = n + 1;
|
||||
|
||||
#pragma omp target firstprivate(b) // expected-note {{defined as threadprivate or thread local}}
|
||||
{
|
||||
int c; // expected-note {{defined as threadprivate or thread local}}
|
||||
#pragma omp parallel shared(b, c) // expected-error 2 {{threadprivate or thread local variable cannot be shared}}
|
||||
this->a = (double)b + 1.5;
|
||||
}
|
||||
|
||||
return (int)b;
|
||||
}
|
||||
};
|
||||
|
||||
int bar(int n, double *ptr) {
|
||||
int a = 0;
|
||||
a += foo(n, ptr);
|
||||
S1 S;
|
||||
a += S.r1(n);
|
||||
a += fstatic(n);
|
||||
a += ftemplate<int>(n); // expected-note {{in instantiation of function template specialization 'ftemplate<int>' requested here}}
|
||||
|
||||
return a;
|
||||
}
|
||||
|
Loading…
Reference in New Issue