[OPENMP]Reduce number of captured global vars.

Try to reduce the number of global vars captured in the OpenMP regions
by capturing them only the regions, which mark them as not-shared.
This commit is contained in:
Alexey Bataev 2020-03-12 12:52:02 -04:00
parent 6bbc1737e0
commit 172f1460ae
8 changed files with 62 additions and 12 deletions

View File

@ -9858,6 +9858,13 @@ public:
bool isOpenMPTargetCapturedDecl(const ValueDecl *D, unsigned Level,
unsigned CaptureLevel) const;
/// Check if the specified global variable must be captured by outer capture
/// regions.
/// \param Level Relative level of nested OpenMP construct for that
/// the check is performed.
bool isOpenMPGlobalCapturedDecl(ValueDecl *D, unsigned Level,
unsigned CaptureLevel) const;
ExprResult PerformOpenMPImplicitIntegerConversion(SourceLocation OpLoc,
Expr *Op);
/// Called on start of new data sharing attribute block.

View File

@ -57,7 +57,8 @@ class OMPLexicalScope : public CodeGenFunction::LexicalScope {
static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
return CGF.LambdaCaptureFields.lookup(VD) ||
(CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
(CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
(CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
}
public:
@ -5551,7 +5552,11 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
// Emit outlined function for task construct.
const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
Address CapturedStruct = Address::invalid();
{
OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
CapturedStruct = GenerateCapturedStmtArgument(*CS);
}
QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
const Expr *IfCond = nullptr;
for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {

View File

@ -16523,13 +16523,19 @@ bool Sema::tryCaptureVariable(
!IsOpenMPPrivateDecl &&
isOpenMPTargetCapturedDecl(Var, RSI->OpenMPLevel,
RSI->OpenMPCaptureLevel);
// Do not capture global if it is not privatized in outer regions.
bool IsGlobalCap =
IsGlobal && isOpenMPGlobalCapturedDecl(Var, RSI->OpenMPLevel,
RSI->OpenMPCaptureLevel);
// When we detect target captures we are looking from inside the
// target region, therefore we need to propagate the capture from the
// enclosing region. Therefore, the capture is not initially nested.
if (IsTargetCap)
adjustOpenMPTargetScopeIndex(FunctionScopesIndex, RSI->OpenMPLevel);
if (IsTargetCap || IsOpenMPPrivateDecl) {
if (IsTargetCap || IsOpenMPPrivateDecl ||
(IsGlobal && !IsGlobalCap)) {
Nested = !IsTargetCap;
DeclRefType = DeclRefType.getUnqualifiedType();
CaptureType = Context.getLValueReferenceType(DeclRefType);

View File

@ -498,6 +498,8 @@ public:
const DSAVarData getTopDSA(ValueDecl *D, bool FromParent);
/// Returns data-sharing attributes for the specified declaration.
const DSAVarData getImplicitDSA(ValueDecl *D, bool FromParent) const;
/// Returns data-sharing attributes for the specified declaration.
const DSAVarData getImplicitDSA(ValueDecl *D, unsigned Level) const;
/// Checks if the specified variables has data-sharing attributes which
/// match specified \a CPred predicate in any directive which matches \a DPred
/// predicate.
@ -1552,6 +1554,15 @@ const DSAStackTy::DSAVarData DSAStackTy::getImplicitDSA(ValueDecl *D,
return getDSA(StartI, D);
}
const DSAStackTy::DSAVarData DSAStackTy::getImplicitDSA(ValueDecl *D,
unsigned Level) const {
if (getStackSize() <= Level)
return DSAVarData();
D = getCanonicalDecl(D);
const_iterator StartI = std::next(begin(), getStackSize() - 1 - Level);
return getDSA(StartI, D);
}
const DSAStackTy::DSAVarData
DSAStackTy::hasDSA(ValueDecl *D,
const llvm::function_ref<bool(OpenMPClauseKind)> CPred,
@ -2108,9 +2119,7 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo,
void Sema::adjustOpenMPTargetScopeIndex(unsigned &FunctionScopesIndex,
unsigned Level) const {
SmallVector<OpenMPDirectiveKind, 4> Regions;
getOpenMPCaptureRegions(Regions, DSAStack->getDirective(Level));
FunctionScopesIndex -= Regions.size();
FunctionScopesIndex -= getOpenMPCaptureLevels(DSAStack->getDirective(Level));
}
void Sema::startOpenMPLoop() {
@ -2213,6 +2222,29 @@ bool Sema::isOpenMPTargetCapturedDecl(const ValueDecl *D, unsigned Level,
Regions[CaptureLevel] != OMPD_task;
}
bool Sema::isOpenMPGlobalCapturedDecl(ValueDecl *D, unsigned Level,
unsigned CaptureLevel) const {
assert(LangOpts.OpenMP && "OpenMP is not allowed");
// Return true if the current level is no longer enclosed in a target region.
if (const auto *VD = dyn_cast<VarDecl>(D)) {
if (!VD->hasLocalStorage()) {
DSAStackTy::DSAVarData TopDVar =
DSAStack->getTopDSA(D, /*FromParent=*/false);
unsigned NumLevels =
getOpenMPCaptureLevels(DSAStack->getDirective(Level));
if (Level == 0)
return (NumLevels == CaptureLevel + 1) && TopDVar.CKind != OMPC_shared;
DSAStackTy::DSAVarData DVar = DSAStack->getImplicitDSA(D, Level - 1);
return DVar.CKind != OMPC_shared ||
isOpenMPGlobalCapturedDecl(
D, Level - 1,
getOpenMPCaptureLevels(DSAStack->getDirective(Level - 1)) - 1);
}
}
return true;
}
void Sema::DestroyDataSharingAttributesStack() { delete DSAStack; }
void Sema::finalizeOpenMPDelayedAnalysis() {
@ -3575,7 +3607,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
};
// Start a captured region for 'parallel'.
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
ParamsParallel, /*OpenMPCaptureLevel=*/1);
ParamsParallel, /*OpenMPCaptureLevel=*/0);
QualType Args[] = {VoidPtrTy};
FunctionProtoType::ExtProtoInfo EPI;
EPI.Variadic = true;
@ -3596,7 +3628,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
std::make_pair(StringRef(), QualType()) // __context with shared vars
};
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
Params, /*OpenMPCaptureLevel=*/2);
Params, /*OpenMPCaptureLevel=*/1);
// Mark this captured region as inlined, because we don't use outlined
// function directly.
getCurCapturedRegion()->TheCapturedDecl->addAttr(

View File

@ -206,7 +206,7 @@ int main() {
// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4
// CHECK: store [[S_DOUBLE_TY]]* %{{.+}}, [[S_DOUBLE_TY]]** [[VAR_REF]],
// CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 5
// CHECK: [[SIVAR_VAL:%.+]] = load i32, i32* %{{.+}},
// CHECK: [[SIVAR_VAL:%.+]] = load i32, i32* @{{.+}},
// CHECK: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_REF]],
// Allocate task.

View File

@ -187,7 +187,7 @@ int main() {
// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
// CHECK: store [[S_DOUBLE_TY]]* [[VAR_ADDR:%.+]], [[S_DOUBLE_TY]]** [[VAR_REF]],
// CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4
// CHECK: store i{{[0-9]+}}* [[SIVAR:%.+]], i{{[0-9]+}}** [[SIVAR_REF]],
// CHECK: store i{{[0-9]+}}* [[SIVAR:@.+]], i{{[0-9]+}}** [[SIVAR_REF]],
// Allocate task.
// Returns struct kmp_task_t {

View File

@ -206,7 +206,7 @@ int main() {
// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4
// CHECK: store [[S_DOUBLE_TY]]* %{{.+}}, [[S_DOUBLE_TY]]** [[VAR_REF]],
// CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 5
// CHECK: [[SIVAR_VAL:%.+]] = load i32, i32* %{{.+}},
// CHECK: [[SIVAR_VAL:%.+]] = load i32, i32* @{{.+}},
// CHECK: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_REF]],
// Allocate task.

View File

@ -187,7 +187,7 @@ int main() {
// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
// CHECK: store [[S_DOUBLE_TY]]* [[VAR_ADDR:%.+]], [[S_DOUBLE_TY]]** [[VAR_REF]],
// CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4
// CHECK: store i{{[0-9]+}}* [[SIVAR:%.+]], i{{[0-9]+}}** [[SIVAR_REF]],
// CHECK: store i{{[0-9]+}}* [[SIVAR:@.+]], i{{[0-9]+}}** [[SIVAR_REF]],
// Allocate task.
// Returns struct kmp_task_t {