[OpenMP] Capture global variables in target regions.

All global variables that are not enclosed in a declare target region 
must be captured in the target region as local variables do. Currently, 
there is no support for declare target, so this patch adds support for 
capturing all the global variables used in a the target region.

llvm-svn: 249154
This commit is contained in:
Samuel Antao 2015-10-02 17:14:03 +00:00
parent 1f385618c0
commit 4be30e92dc
6 changed files with 251 additions and 3 deletions

View File

@ -109,6 +109,12 @@ bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind);
/// parallel', otherwise - false.
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind);
/// \brief Checks if the specified directive is a target-kind directive.
/// \param DKind Specified directive.
/// \return true - the directive is a target-like directive like 'omp target',
/// otherwise - false.
bool isOpenMPTargetDirective(OpenMPDirectiveKind DKind);
/// \brief Checks if the specified directive is a teams-kind directive.
/// \param DKind Specified directive.
/// \return true - the directive is a teams-like directive like 'omp teams',

View File

@ -7728,6 +7728,11 @@ public:
/// is performed.
bool isOpenMPPrivateVar(VarDecl *VD, unsigned Level);
/// \brief Check if the specified variable is captured by 'target' directive.
/// \param Level Relative level of nested OpenMP construct for that the check
/// is performed.
bool isOpenMPTargetCapturedVar(VarDecl *VD, unsigned Level);
ExprResult PerformOpenMPImplicitIntegerConversion(SourceLocation OpLoc,
Expr *Op);
/// \brief Called on start of new data sharing attribute block.

View File

@ -424,6 +424,10 @@ bool clang::isOpenMPParallelDirective(OpenMPDirectiveKind DKind) {
DKind == OMPD_parallel_sections; // TODO add next directives.
}
bool clang::isOpenMPTargetDirective(OpenMPDirectiveKind DKind) {
return DKind == OMPD_target; // TODO add next directives.
}
bool clang::isOpenMPTeamsDirective(OpenMPDirectiveKind DKind) {
return DKind == OMPD_teams; // TODO add next directives.
}

View File

@ -13183,10 +13183,18 @@ bool Sema::tryCaptureVariable(
if (getLangOpts().OpenMP) {
if (auto *RSI = dyn_cast<CapturedRegionScopeInfo>(CSI)) {
// OpenMP private variables should not be captured in outer scope, so
// just break here.
// just break here. Similarly, global variables that are captured in a
// target region should not be captured outside the scope of the region.
if (RSI->CapRegionKind == CR_OpenMP) {
if (isOpenMPPrivateVar(Var, OpenMPLevel)) {
Nested = true;
auto isTargetCap = isOpenMPTargetCapturedVar(Var, OpenMPLevel);
// When we detect target captures we are looking from inside the
// target region, therefore we need to propagate the capture from the
// enclosing region. Therefore, the capture is not initially nested.
if (isTargetCap)
FunctionScopesIndex--;
if (isTargetCap || isOpenMPPrivateVar(Var, OpenMPLevel)) {
Nested = !isTargetCap;
DeclRefType = DeclRefType.getUnqualifiedType();
CaptureType = Context.getLValueReferenceType(DeclRefType);
break;

View File

@ -194,6 +194,13 @@ public:
bool hasExplicitDSA(VarDecl *D,
const llvm::function_ref<bool(OpenMPClauseKind)> &CPred,
unsigned Level);
/// \brief Returns true if the directive at level \Level matches in the
/// specified \a DPred predicate.
bool hasExplicitDirective(
const llvm::function_ref<bool(OpenMPDirectiveKind)> &DPred,
unsigned Level);
/// \brief Finds a directive which matches specified \a DPred predicate.
template <class NamedDirectivesPredicate>
bool hasDirective(NamedDirectivesPredicate DPred, bool FromParent);
@ -661,6 +668,19 @@ bool DSAStackTy::hasExplicitDSA(
CPred(StartI->SharingMap[D].Attributes);
}
bool DSAStackTy::hasExplicitDirective(
const llvm::function_ref<bool(OpenMPDirectiveKind)> &DPred,
unsigned Level) {
if (isClauseParsingMode())
++Level;
auto StartI = Stack.rbegin();
auto EndI = std::prev(Stack.rend());
if (std::distance(StartI, EndI) <= (int)Level)
return false;
std::advance(StartI, Level);
return DPred(StartI->Directive);
}
template <class NamedDirectivesPredicate>
bool DSAStackTy::hasDirective(NamedDirectivesPredicate DPred, bool FromParent) {
auto StartI = std::next(Stack.rbegin());
@ -684,6 +704,30 @@ void Sema::InitDataSharingAttributesStack() {
bool Sema::IsOpenMPCapturedVar(VarDecl *VD) {
assert(LangOpts.OpenMP && "OpenMP is not allowed");
VD = VD->getCanonicalDecl();
// If we are attempting to capture a global variable in a directive with
// 'target' we return true so that this global is also mapped to the device.
//
// FIXME: If the declaration is enclosed in a 'declare target' directive,
// then it should not be captured. Therefore, an extra check has to be
// inserted here once support for 'declare target' is added.
//
if (!VD->hasLocalStorage()) {
if (DSAStack->getCurrentDirective() == OMPD_target &&
!DSAStack->isClauseParsingMode()) {
return true;
}
if (DSAStack->getCurScope() &&
DSAStack->hasDirective(
[](OpenMPDirectiveKind K, const DeclarationNameInfo &DNI,
SourceLocation Loc) -> bool {
return isOpenMPTargetDirective(K);
},
false)) {
return true;
}
}
if (DSAStack->getCurrentDirective() != OMPD_unknown &&
(!DSAStack->isClauseParsingMode() ||
DSAStack->getParentDirective() != OMPD_unknown)) {
@ -708,6 +752,14 @@ bool Sema::isOpenMPPrivateVar(VarDecl *VD, unsigned Level) {
VD, [](OpenMPClauseKind K) -> bool { return K == OMPC_private; }, Level);
}
bool Sema::isOpenMPTargetCapturedVar(VarDecl *VD, unsigned Level) {
assert(LangOpts.OpenMP && "OpenMP is not allowed");
// Return true if the current level is no longer enclosed in a target region.
return !VD->hasLocalStorage() &&
DSAStack->hasExplicitDirective(isOpenMPTargetDirective, Level);
}
void Sema::DestroyDataSharingAttributesStack() { delete DSAStack; }
void Sema::StartOpenMPDSABlock(OpenMPDirectiveKind DKind,

View File

@ -0,0 +1,173 @@
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// CHECK-DAG: [[GA:@.+]] = global double 1.000000e+00
// CHECK-DAG: [[GB:@.+]] = global double 2.000000e+00
// CHECK-DAG: [[GC:@.+]] = global double 3.000000e+00
// CHECK-DAG: [[GD:@.+]] = global double 4.000000e+00
// CHECK-DAG: [[FA:@.+]] = internal global float 5.000000e+00
// CHECK-DAG: [[FB:@.+]] = internal global float 6.000000e+00
// CHECK-DAG: [[FC:@.+]] = internal global float 7.000000e+00
// CHECK-DAG: [[FD:@.+]] = internal global float 8.000000e+00
// CHECK-DAG: [[BA:@.+]] = internal global float 9.000000e+00
// CHECK-DAG: [[BB:@.+]] = internal global float 1.000000e+01
// CHECK-DAG: [[BC:@.+]] = internal global float 1.100000e+01
// CHECK-DAG: [[BD:@.+]] = internal global float 1.200000e+01
double Ga = 1.0;
double Gb = 2.0;
double Gc = 3.0;
double Gd = 4.0;
// CHECK: define {{.*}} @{{.*}}foo{{.*}}(
// CHECK-SAME: i16 {{[^,]*}}[[A:%[^,]+]],
// CHECK-SAME: i16 {{[^,]*}}[[B:%[^,]+]],
// CHECK-SAME: i16 {{[^,]*}}[[C:%[^,]+]],
// CHECK-SAME: i16 {{[^,]*}}[[D:%[^,]+]])
// CHECK: [[LA:%.+]] = alloca i16
// CHECK: [[LB:%.+]] = alloca i16
// CHECK: [[LC:%.+]] = alloca i16
// CHECK: [[LD:%.+]] = alloca i16
int foo(short a, short b, short c, short d){
static float Sa = 5.0;
static float Sb = 6.0;
static float Sc = 7.0;
static float Sd = 8.0;
// CHECK-DAG: [[REFB:%.+]] = bitcast i16* [[LB]] to i8*
// CHECK-DAG: store i8* [[REFB]], i8** [[GEPB:%.+]], align
// CHECK-DAG: [[REFC:%.+]] = bitcast i16* [[LC]] to i8*
// CHECK-DAG: store i8* [[REFC]], i8** [[GEPC:%.+]], align
// CHECK-DAG: [[REFD:%.+]] = bitcast i16* [[LD]] to i8*
// CHECK-DAG: store i8* [[REFD]], i8** [[GEPD:%.+]], align
// CHECK-DAG: store i8* bitcast (double* [[GB]] to i8*), i8** [[GEPGB:%.+]], align
// CHECK-DAG: store i8* bitcast (double* [[GC]] to i8*), i8** [[GEPGC:%.+]], align
// CHECK-DAG: store i8* bitcast (double* [[GD]] to i8*), i8** [[GEPGD:%.+]], align
// CHECK-DAG: store i8* bitcast (float* [[FB]] to i8*), i8** [[GEPFB:%.+]], align
// CHECK-DAG: store i8* bitcast (float* [[FC]] to i8*), i8** [[GEPFC:%.+]], align
// CHECK-DAG: store i8* bitcast (float* [[FD]] to i8*), i8** [[GEPFD:%.+]], align
// CHECK-DAG: [[GEPB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
// CHECK-DAG: [[GEPC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
// CHECK-DAG: [[GEPD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
// CHECK-DAG: [[GEPGB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
// CHECK-DAG: [[GEPGC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
// CHECK-DAG: [[GEPGD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
// CHECK-DAG: [[GEPFB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
// CHECK-DAG: [[GEPFC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
// CHECK-DAG: [[GEPFD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
// CHECK: call i32 @__tgt_target
// CHECK: call void [[OFFLOADF:@.+]](
// Capture b, Gb, Sb, Gc, c, Sc, d, Gd, Sd
#pragma omp target if(Ga>0.0 && a>0 && Sa>0.0)
{
b += 1;
Gb += 1.0;
Sb += 1.0;
// CHECK: define internal void [[OFFLOADF]]({{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}})
// The parallel region only uses 3 captures.
// CHECK: call {{.*}}@__kmpc_fork_call(%ident_t* {{.+}}, i32 {{.+}}, void (i32*, i32*, ...)* bitcast ({{.*}}[[PARF:@.+]] to {{.*}}), {{.+}}* %{{.+}}, {{.+}}* %{{.+}}, {{.+}}* %{{.+}})
// CHECK: call void @.omp_outlined.(i32* %{{.+}}, i32* %{{.+}}, {{.+}}* %{{.+}}, {{.+}}* %{{.+}}, {{.+}}* %{{.+}})
// Capture d, Gd, Sd,
// CHECK: define internal void [[PARF]](i32* noalias %{{.*}}, i32* noalias %{{.*}},
#pragma omp parallel if(Gc>0.0 && c>0 && Sc>0.0)
{
d += 1;
Gd += 1.0;
Sd += 1.0;
}
}
return a + b + c + d + (int)Sa + (int)Sb + (int)Sc + (int)Sd;
}
// CHECK: define {{.*}} @{{.*}}bar{{.*}}(
// CHECK-SAME: i16 {{[^,]*}}[[A:%[^,]+]],
// CHECK-SAME: i16 {{[^,]*}}[[B:%[^,]+]],
// CHECK-SAME: i16 {{[^,]*}}[[C:%[^,]+]],
// CHECK-SAME: i16 {{[^,]*}}[[D:%[^,]+]])
// CHECK: [[LA:%.+]] = alloca i16
// CHECK: [[LB:%.+]] = alloca i16
// CHECK: [[LC:%.+]] = alloca i16
// CHECK: [[LD:%.+]] = alloca i16
int bar(short a, short b, short c, short d){
static float Sa = 9.0;
static float Sb = 10.0;
static float Sc = 11.0;
static float Sd = 12.0;
// CHECK: call void {{.*}}@__kmpc_fork_call(%ident_t* {{.+}}, i32 {{.+}}, void (i32*, i32*, ...)* bitcast ({{.*}}[[PARF:@.+]] to {{.*}}), i16* %{{.+}}, i16* %{{.+}}, i16* %{{.+}}, i16* %{{.+}})
// CHECK: define internal void [[PARF]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i16* dereferenceable(2) [[A:%.+]], i16* dereferenceable(2) [[B:%.+]], i16* dereferenceable(2) [[C:%.+]], i16* dereferenceable(2) [[D:%.+]])
// Capture a, b, c, d
#pragma omp parallel
{
// CHECK: [[ADRA:%.+]] = alloca i16*, align
// CHECK: [[ADRB:%.+]] = alloca i16*, align
// CHECK: [[ADRC:%.+]] = alloca i16*, align
// CHECK: [[ADRD:%.+]] = alloca i16*, align
// CHECK: store i16* [[A]], i16** [[ADRA]], align
// CHECK: store i16* [[B]], i16** [[ADRB]], align
// CHECK: store i16* [[C]], i16** [[ADRC]], align
// CHECK: store i16* [[D]], i16** [[ADRD]], align
// CHECK: [[REFA:%.+]] = load i16*, i16** [[ADRA]],
// CHECK: [[REFB:%.+]] = load i16*, i16** [[ADRB]],
// CHECK: [[REFC:%.+]] = load i16*, i16** [[ADRC]],
// CHECK: [[REFD:%.+]] = load i16*, i16** [[ADRD]],
// CHECK: load float, float* [[BA]]
// CHECK-DAG: [[CSTB:%.+]] = bitcast i16* [[REFB]] to i8*
// CHECK-DAG: [[CSTC:%.+]] = bitcast i16* [[REFC]] to i8*
// CHECK-DAG: [[CSTD:%.+]] = bitcast i16* [[REFD]] to i8*
// CHECK-DAG: store i8* [[CSTB]], i8** [[GEPB:%.+]], align
// CHECK-DAG: store i8* [[CSTC]], i8** [[GEPC:%.+]], align
// CHECK-DAG: store i8* [[CSTD]], i8** [[GEPD:%.+]], align
// CHECK-DAG: store i8* bitcast (double* [[GB]] to i8*), i8** [[GEPGB:%.+]], align
// CHECK-DAG: store i8* bitcast (double* [[GC]] to i8*), i8** [[GEPGC:%.+]], align
// CHECK-DAG: store i8* bitcast (double* [[GD]] to i8*), i8** [[GEPGD:%.+]], align
// CHECK-DAG: store i8* bitcast (float* [[BB]] to i8*), i8** [[GEPBB:%.+]], align
// CHECK-DAG: store i8* bitcast (float* [[BC]] to i8*), i8** [[GEPBC:%.+]], align
// CHECK-DAG: store i8* bitcast (float* [[BD]] to i8*), i8** [[GEPBD:%.+]], align
// CHECK-DAG: [[GEPB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
// CHECK-DAG: [[GEPC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
// CHECK-DAG: [[GEPD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
// CHECK-DAG: [[GEPGB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
// CHECK-DAG: [[GEPGC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
// CHECK-DAG: [[GEPGD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
// CHECK-DAG: [[GEPBB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
// CHECK-DAG: [[GEPBC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
// CHECK-DAG: [[GEPBD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
// CHECK: call i32 @__tgt_target
// CHECK: call void [[OFFLOADF:@.+]](
// Capture b, Gb, Sb, Gc, c, Sc, d, Gd, Sd
#pragma omp target if(Ga>0.0 && a>0 && Sa>0.0)
{
b += 1;
Gb += 1.0;
Sb += 1.0;
// CHECK: define internal void [[OFFLOADF]]({{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}})
// CHECK: call void {{.*}}@__kmpc_fork_call(%ident_t* {{.+}}, i32 {{.+}}, void (i32*, i32*, ...)* bitcast ({{.*}}[[PARF:@.+]] to {{.*}})
// CHECK: define internal void [[PARF]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, {{.+}}* dereferenceable({{.+}}) %{{.+}}, {{.+}}* dereferenceable({{.+}}) %{{.+}}, {{.+}}* dereferenceable({{.+}}) %{{.+}})
// Capture d, Gd, Sd
#pragma omp parallel if(Gc>0.0 && c>0 && Sc>0.0)
{
d += 1;
Gd += 1.0;
Sd += 1.0;
}
}
}
return a + b + c + d + (int)Sa + (int)Sb + (int)Sc + (int)Sd;
}
#endif