forked from OSchip/llvm-project
[OPENMP]Reduce number of captured global vars.
Try to reduce the number of global vars captured in the OpenMP regions by capturing them only the regions, which mark them as not-shared.
This commit is contained in:
parent
6bbc1737e0
commit
172f1460ae
|
@ -9858,6 +9858,13 @@ public:
|
|||
bool isOpenMPTargetCapturedDecl(const ValueDecl *D, unsigned Level,
|
||||
unsigned CaptureLevel) const;
|
||||
|
||||
/// Check if the specified global variable must be captured by outer capture
|
||||
/// regions.
|
||||
/// \param Level Relative level of nested OpenMP construct for that
|
||||
/// the check is performed.
|
||||
bool isOpenMPGlobalCapturedDecl(ValueDecl *D, unsigned Level,
|
||||
unsigned CaptureLevel) const;
|
||||
|
||||
ExprResult PerformOpenMPImplicitIntegerConversion(SourceLocation OpLoc,
|
||||
Expr *Op);
|
||||
/// Called on start of new data sharing attribute block.
|
||||
|
|
|
@ -57,7 +57,8 @@ class OMPLexicalScope : public CodeGenFunction::LexicalScope {
|
|||
static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
|
||||
return CGF.LambdaCaptureFields.lookup(VD) ||
|
||||
(CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
|
||||
(CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
|
||||
(CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
|
||||
cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -5551,7 +5552,11 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
|
|||
assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
|
||||
// Emit outlined function for task construct.
|
||||
const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
|
||||
Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
|
||||
Address CapturedStruct = Address::invalid();
|
||||
{
|
||||
OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
|
||||
CapturedStruct = GenerateCapturedStmtArgument(*CS);
|
||||
}
|
||||
QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
|
||||
const Expr *IfCond = nullptr;
|
||||
for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
|
||||
|
|
|
@ -16523,13 +16523,19 @@ bool Sema::tryCaptureVariable(
|
|||
!IsOpenMPPrivateDecl &&
|
||||
isOpenMPTargetCapturedDecl(Var, RSI->OpenMPLevel,
|
||||
RSI->OpenMPCaptureLevel);
|
||||
// Do not capture global if it is not privatized in outer regions.
|
||||
bool IsGlobalCap =
|
||||
IsGlobal && isOpenMPGlobalCapturedDecl(Var, RSI->OpenMPLevel,
|
||||
RSI->OpenMPCaptureLevel);
|
||||
|
||||
// When we detect target captures we are looking from inside the
|
||||
// target region, therefore we need to propagate the capture from the
|
||||
// enclosing region. Therefore, the capture is not initially nested.
|
||||
if (IsTargetCap)
|
||||
adjustOpenMPTargetScopeIndex(FunctionScopesIndex, RSI->OpenMPLevel);
|
||||
|
||||
if (IsTargetCap || IsOpenMPPrivateDecl) {
|
||||
if (IsTargetCap || IsOpenMPPrivateDecl ||
|
||||
(IsGlobal && !IsGlobalCap)) {
|
||||
Nested = !IsTargetCap;
|
||||
DeclRefType = DeclRefType.getUnqualifiedType();
|
||||
CaptureType = Context.getLValueReferenceType(DeclRefType);
|
||||
|
|
|
@ -498,6 +498,8 @@ public:
|
|||
const DSAVarData getTopDSA(ValueDecl *D, bool FromParent);
|
||||
/// Returns data-sharing attributes for the specified declaration.
|
||||
const DSAVarData getImplicitDSA(ValueDecl *D, bool FromParent) const;
|
||||
/// Returns data-sharing attributes for the specified declaration.
|
||||
const DSAVarData getImplicitDSA(ValueDecl *D, unsigned Level) const;
|
||||
/// Checks if the specified variables has data-sharing attributes which
|
||||
/// match specified \a CPred predicate in any directive which matches \a DPred
|
||||
/// predicate.
|
||||
|
@ -1552,6 +1554,15 @@ const DSAStackTy::DSAVarData DSAStackTy::getImplicitDSA(ValueDecl *D,
|
|||
return getDSA(StartI, D);
|
||||
}
|
||||
|
||||
const DSAStackTy::DSAVarData DSAStackTy::getImplicitDSA(ValueDecl *D,
|
||||
unsigned Level) const {
|
||||
if (getStackSize() <= Level)
|
||||
return DSAVarData();
|
||||
D = getCanonicalDecl(D);
|
||||
const_iterator StartI = std::next(begin(), getStackSize() - 1 - Level);
|
||||
return getDSA(StartI, D);
|
||||
}
|
||||
|
||||
const DSAStackTy::DSAVarData
|
||||
DSAStackTy::hasDSA(ValueDecl *D,
|
||||
const llvm::function_ref<bool(OpenMPClauseKind)> CPred,
|
||||
|
@ -2108,9 +2119,7 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo,
|
|||
|
||||
void Sema::adjustOpenMPTargetScopeIndex(unsigned &FunctionScopesIndex,
|
||||
unsigned Level) const {
|
||||
SmallVector<OpenMPDirectiveKind, 4> Regions;
|
||||
getOpenMPCaptureRegions(Regions, DSAStack->getDirective(Level));
|
||||
FunctionScopesIndex -= Regions.size();
|
||||
FunctionScopesIndex -= getOpenMPCaptureLevels(DSAStack->getDirective(Level));
|
||||
}
|
||||
|
||||
void Sema::startOpenMPLoop() {
|
||||
|
@ -2213,6 +2222,29 @@ bool Sema::isOpenMPTargetCapturedDecl(const ValueDecl *D, unsigned Level,
|
|||
Regions[CaptureLevel] != OMPD_task;
|
||||
}
|
||||
|
||||
bool Sema::isOpenMPGlobalCapturedDecl(ValueDecl *D, unsigned Level,
|
||||
unsigned CaptureLevel) const {
|
||||
assert(LangOpts.OpenMP && "OpenMP is not allowed");
|
||||
// Return true if the current level is no longer enclosed in a target region.
|
||||
|
||||
if (const auto *VD = dyn_cast<VarDecl>(D)) {
|
||||
if (!VD->hasLocalStorage()) {
|
||||
DSAStackTy::DSAVarData TopDVar =
|
||||
DSAStack->getTopDSA(D, /*FromParent=*/false);
|
||||
unsigned NumLevels =
|
||||
getOpenMPCaptureLevels(DSAStack->getDirective(Level));
|
||||
if (Level == 0)
|
||||
return (NumLevels == CaptureLevel + 1) && TopDVar.CKind != OMPC_shared;
|
||||
DSAStackTy::DSAVarData DVar = DSAStack->getImplicitDSA(D, Level - 1);
|
||||
return DVar.CKind != OMPC_shared ||
|
||||
isOpenMPGlobalCapturedDecl(
|
||||
D, Level - 1,
|
||||
getOpenMPCaptureLevels(DSAStack->getDirective(Level - 1)) - 1);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void Sema::DestroyDataSharingAttributesStack() { delete DSAStack; }
|
||||
|
||||
void Sema::finalizeOpenMPDelayedAnalysis() {
|
||||
|
@ -3575,7 +3607,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
|
|||
};
|
||||
// Start a captured region for 'parallel'.
|
||||
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
|
||||
ParamsParallel, /*OpenMPCaptureLevel=*/1);
|
||||
ParamsParallel, /*OpenMPCaptureLevel=*/0);
|
||||
QualType Args[] = {VoidPtrTy};
|
||||
FunctionProtoType::ExtProtoInfo EPI;
|
||||
EPI.Variadic = true;
|
||||
|
@ -3596,7 +3628,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
|
|||
std::make_pair(StringRef(), QualType()) // __context with shared vars
|
||||
};
|
||||
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
|
||||
Params, /*OpenMPCaptureLevel=*/2);
|
||||
Params, /*OpenMPCaptureLevel=*/1);
|
||||
// Mark this captured region as inlined, because we don't use outlined
|
||||
// function directly.
|
||||
getCurCapturedRegion()->TheCapturedDecl->addAttr(
|
||||
|
|
|
@ -206,7 +206,7 @@ int main() {
|
|||
// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4
|
||||
// CHECK: store [[S_DOUBLE_TY]]* %{{.+}}, [[S_DOUBLE_TY]]** [[VAR_REF]],
|
||||
// CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 5
|
||||
// CHECK: [[SIVAR_VAL:%.+]] = load i32, i32* %{{.+}},
|
||||
// CHECK: [[SIVAR_VAL:%.+]] = load i32, i32* @{{.+}},
|
||||
// CHECK: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_REF]],
|
||||
|
||||
// Allocate task.
|
||||
|
|
|
@ -187,7 +187,7 @@ int main() {
|
|||
// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
|
||||
// CHECK: store [[S_DOUBLE_TY]]* [[VAR_ADDR:%.+]], [[S_DOUBLE_TY]]** [[VAR_REF]],
|
||||
// CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4
|
||||
// CHECK: store i{{[0-9]+}}* [[SIVAR:%.+]], i{{[0-9]+}}** [[SIVAR_REF]],
|
||||
// CHECK: store i{{[0-9]+}}* [[SIVAR:@.+]], i{{[0-9]+}}** [[SIVAR_REF]],
|
||||
|
||||
// Allocate task.
|
||||
// Returns struct kmp_task_t {
|
||||
|
|
|
@ -206,7 +206,7 @@ int main() {
|
|||
// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4
|
||||
// CHECK: store [[S_DOUBLE_TY]]* %{{.+}}, [[S_DOUBLE_TY]]** [[VAR_REF]],
|
||||
// CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 5
|
||||
// CHECK: [[SIVAR_VAL:%.+]] = load i32, i32* %{{.+}},
|
||||
// CHECK: [[SIVAR_VAL:%.+]] = load i32, i32* @{{.+}},
|
||||
// CHECK: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_REF]],
|
||||
|
||||
// Allocate task.
|
||||
|
|
|
@ -187,7 +187,7 @@ int main() {
|
|||
// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
|
||||
// CHECK: store [[S_DOUBLE_TY]]* [[VAR_ADDR:%.+]], [[S_DOUBLE_TY]]** [[VAR_REF]],
|
||||
// CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4
|
||||
// CHECK: store i{{[0-9]+}}* [[SIVAR:%.+]], i{{[0-9]+}}** [[SIVAR_REF]],
|
||||
// CHECK: store i{{[0-9]+}}* [[SIVAR:@.+]], i{{[0-9]+}}** [[SIVAR_REF]],
|
||||
|
||||
// Allocate task.
|
||||
// Returns struct kmp_task_t {
|
||||
|
|
Loading…
Reference in New Issue