[OPENMP, NVPTX] Improve globalization of the variables captured by value.

If the variable is captured by value and the corresponding parameter in
the outlined function escapes its declaration context, this parameter
must be globalized. To globalize it we need to get the address of the
original parameter, load the value, store it to the global address and
use this global address instead of the original.

Patch improves globalization for parallel|teams regions + functions in
declare target regions.

llvm-svn: 327654
This commit is contained in:
Alexey Bataev 2018-03-15 18:10:54 +00:00
parent 3dbc362fe2
commit c99042ba97
5 changed files with 269 additions and 90 deletions

View File

@ -171,16 +171,23 @@ class CheckVarsEscapingDeclContext final
: public ConstStmtVisitor<CheckVarsEscapingDeclContext> {
CodeGenFunction &CGF;
llvm::SetVector<const ValueDecl *> EscapedDecls;
llvm::SmallPtrSet<const Decl *, 4> EscapedParameters;
llvm::SmallPtrSet<const ValueDecl *, 4> IgnoredDecls;
bool AllEscaped = false;
RecordDecl *GlobalizedRD = nullptr;
llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
void markAsEscaped(const ValueDecl *VD) {
if (IgnoredDecls.count(VD) ||
(CGF.CapturedStmtInfo &&
CGF.CapturedStmtInfo->lookup(cast<VarDecl>(VD))))
if (IgnoredDecls.count(VD))
return;
// Variables captured by value must be globalized.
if (auto *CSI = CGF.CapturedStmtInfo) {
if (const FieldDecl *FD = CGF.CapturedStmtInfo->lookup(cast<VarDecl>(VD))) {
if (FD->getType()->isReferenceType())
return;
EscapedParameters.insert(VD);
}
}
EscapedDecls.insert(VD);
}
@ -385,12 +392,15 @@ public:
Visit(Child);
}
/// Returns the record that handles all the escaped local variables and used
/// instead of their original storage.
const RecordDecl *getGlobalizedRecord() {
if (!GlobalizedRD)
buildRecordForGlobalizedVars();
return GlobalizedRD;
}
/// Returns the field in the globalized record for the escaped variable.
const FieldDecl *getFieldForGlobalizedVar(const ValueDecl *VD) const {
assert(GlobalizedRD &&
"Record for globalized variables must be generated already.");
@ -400,9 +410,16 @@ public:
return I->getSecond();
}
/// Returns the list of the escaped local variables/parameters.
ArrayRef<const ValueDecl *> getEscapedDecls() const {
return EscapedDecls.getArrayRef();
}
/// Checks if the escaped local variable is actually a parameter passed by
/// value.
const llvm::SmallPtrSetImpl<const Decl *> &getEscapedParameters() const {
return EscapedParameters;
}
};
} // anonymous namespace
@ -614,58 +631,14 @@ void CGOpenMPRuntimeNVPTX::emitGenericEntryHeader(CodeGenFunction &CGF,
createNVPTXRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_init_stack));
const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
if (I == FunctionGlobalizedDecls.end())
return;
const RecordDecl *GlobalizedVarsRecord = I->getSecond().first;
QualType RecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);
// Recover pointer to this function's global record. The runtime will
// handle the specifics of the allocation of the memory.
// Use actual memory size of the record including the padding
// for alignment purposes.
unsigned Alignment =
CGM.getContext().getTypeAlignInChars(RecTy).getQuantity();
unsigned GlobalRecordSize =
CGM.getContext().getTypeSizeInChars(RecTy).getQuantity();
GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
// TODO: allow the usage of shared memory to be controlled by
// the user, for now, default to global.
llvm::Value *GlobalRecordSizeArg[] = {
llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
GlobalRecordSizeArg);
llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo());
FunctionToGlobalRecPtr.try_emplace(CGF.CurFn, GlobalRecValue);
// Emit the "global alloca" which is a GEP from the global declaration record
// using the pointer returned by the runtime.
LValue Base = CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, RecTy);
auto &Res = I->getSecond().second;
for (auto &Rec : Res) {
const FieldDecl *FD = Rec.second.first;
LValue VarAddr = CGF.EmitLValueForField(Base, FD);
Rec.second.second = VarAddr.getAddress();
}
emitGenericVarsProlog(CGF, WST.Loc);
}
void CGOpenMPRuntimeNVPTX::emitGenericEntryFooter(CodeGenFunction &CGF,
EntryFunctionState &EST) {
const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
if (I != FunctionGlobalizedDecls.end()) {
if (!CGF.HaveInsertPoint())
return;
auto I = FunctionToGlobalRecPtr.find(CGF.CurFn);
if (I != FunctionToGlobalRecPtr.end()) {
llvm::Value *Args[] = {I->getSecond()};
CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
Args);
}
}
emitGenericVarsEpilog(CGF);
if (!CGF.HaveInsertPoint())
return;
if (!EST.ExitBB)
EST.ExitBB = CGF.createBasicBlock(".exit");
@ -1207,6 +1180,24 @@ void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF,
llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
SourceLocation Loc = D.getLocStart();
// Emit target region as a standalone region.
class NVPTXPrePostActionTy : public PrePostActionTy {
SourceLocation &Loc;
public:
NVPTXPrePostActionTy(SourceLocation &Loc) : Loc(Loc) {}
void Enter(CodeGenFunction &CGF) override {
static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
.emitGenericVarsProlog(CGF, Loc);
}
void Exit(CodeGenFunction &CGF) override {
static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
.emitGenericVarsEpilog(CGF);
}
} Action(Loc);
CodeGen.setAction(Action);
auto *OutlinedFun =
cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen));
@ -1222,7 +1213,24 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
SourceLocation Loc = D.getLocStart();
// Emit target region as a standalone region.
class NVPTXPrePostActionTy : public PrePostActionTy {
SourceLocation &Loc;
public:
NVPTXPrePostActionTy(SourceLocation &Loc) : Loc(Loc) {}
void Enter(CodeGenFunction &CGF) override {
static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
.emitGenericVarsProlog(CGF, Loc);
}
void Exit(CodeGenFunction &CGF) override {
static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
.emitGenericVarsEpilog(CGF);
}
} Action(Loc);
CodeGen.setAction(Action);
llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen);
llvm::Function *OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
@ -1233,6 +1241,73 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
return OutlinedFun;
}
void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
SourceLocation Loc) {
CGBuilderTy &Bld = CGF.Builder;
const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
if (I == FunctionGlobalizedDecls.end())
return;
const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord;
QualType RecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);
// Recover pointer to this function's global record. The runtime will
// handle the specifics of the allocation of the memory.
// Use actual memory size of the record including the padding
// for alignment purposes.
unsigned Alignment =
CGM.getContext().getTypeAlignInChars(RecTy).getQuantity();
unsigned GlobalRecordSize =
CGM.getContext().getTypeSizeInChars(RecTy).getQuantity();
GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
// TODO: allow the usage of shared memory to be controlled by
// the user, for now, default to global.
llvm::Value *GlobalRecordSizeArg[] = {
llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
GlobalRecordSizeArg);
llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo());
LValue Base = CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, RecTy);
I->getSecond().GlobalRecordAddr = GlobalRecValue;
// Emit the "global alloca" which is a GEP from the global declaration record
// using the pointer returned by the runtime.
for (auto &Rec : I->getSecond().LocalVarData) {
bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);
llvm::Value *ParValue;
if (EscapedParam) {
const auto *VD = cast<VarDecl>(Rec.first);
LValue ParLVal =
CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
ParValue = CGF.EmitLoadOfScalar(ParLVal, Loc);
}
const FieldDecl *FD = Rec.second.first;
LValue VarAddr = CGF.EmitLValueForField(Base, FD);
Rec.second.second = VarAddr.getAddress();
if (EscapedParam) {
const auto *VD = cast<VarDecl>(Rec.first);
CGF.EmitStoreOfScalar(ParValue, VarAddr);
I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress());
}
}
I->getSecond().MappedParams->apply(CGF);
}
void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF) {
const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
if (I != FunctionGlobalizedDecls.end() && I->getSecond().GlobalRecordAddr) {
I->getSecond().MappedParams->restore(CGF);
if (!CGF.HaveInsertPoint())
return;
CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
I->getSecond().GlobalRecordAddr);
}
}
void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
SourceLocation Loc,
@ -1317,7 +1392,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(
llvm::Value *PtrV = Bld.CreateBitCast(V, CGF.VoidPtrTy);
CGF.EmitStoreOfScalar(PtrV, Dst, /*Volatile=*/false,
Ctx.getPointerType(Ctx.VoidPtrTy));
Idx++;
++Idx;
}
}
@ -2750,8 +2825,8 @@ void CGOpenMPRuntimeNVPTX::emitOutlinedFunctionCall(
continue;
}
llvm::Value *TargetArg = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
NativeArg, NativeArg->getType()->getPointerElementType()->getPointerTo(
/*AddrSpace=*/0));
NativeArg,
NativeArg->getType()->getPointerElementType()->getPointerTo());
TargetArgs.emplace_back(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TargetArg, TargetType));
}
@ -2788,7 +2863,7 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper(
auto *Fn = llvm::Function::Create(
CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
OutlinedParallelFn->getName() + "_wrapper", &CGM.getModule());
CGM.SetInternalFunctionAttributes(/*D=*/GlobalDecl(), Fn, CGFI);
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
@ -2854,6 +2929,7 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
"Function is registered already.");
SmallVector<const ValueDecl *, 4> IgnoredDecls;
const Stmt *Body = nullptr;
bool NeedToDelayGlobalization = false;
if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
Body = FD->getBody();
} else if (const auto *BD = dyn_cast<BlockDecl>(D)) {
@ -2861,6 +2937,7 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
} else if (const auto *CD = dyn_cast<CapturedDecl>(D)) {
Body = CD->getBody();
if (CGF.CapturedStmtInfo->getKind() == CR_OpenMP) {
NeedToDelayGlobalization = true;
if (const auto *CS = dyn_cast<CapturedStmt>(Body)) {
IgnoredDecls.reserve(CS->capture_size());
for (const auto &Capture : CS->captures())
@ -2876,14 +2953,29 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
const RecordDecl *GlobalizedVarsRecord = VarChecker.getGlobalizedRecord();
if (!GlobalizedVarsRecord)
return;
auto &Res =
FunctionGlobalizedDecls
.try_emplace(CGF.CurFn, GlobalizedVarsRecord, DeclToAddrMapTy())
.first->getSecond()
.second;
auto I = FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
I->getSecond().MappedParams =
llvm::make_unique<CodeGenFunction::OMPMapVars>();
I->getSecond().GlobalRecord = GlobalizedVarsRecord;
I->getSecond().EscapedParameters.insert(
VarChecker.getEscapedParameters().begin(),
VarChecker.getEscapedParameters().end());
DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
Res.insert(std::make_pair(VD, std::make_pair(FD, Address::invalid())));
Data.insert(std::make_pair(VD, std::make_pair(FD, Address::invalid())));
}
if (!NeedToDelayGlobalization) {
emitGenericVarsProlog(CGF, D->getLocStart());
struct GlobalizationScope final : EHScopeStack::Cleanup {
GlobalizationScope() = default;
void Emit(CodeGenFunction &CGF, Flags flags) override {
static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
.emitGenericVarsEpilog(CGF);
}
};
CGF.EHStack.pushCleanup<GlobalizationScope>(NormalAndEHCleanup);
}
}
@ -2892,14 +2984,13 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
if (I == FunctionGlobalizedDecls.end())
return Address::invalid();
auto VDI = I->getSecond().second.find(VD);
if (VDI == I->getSecond().second.end())
auto VDI = I->getSecond().LocalVarData.find(VD);
if (VDI == I->getSecond().LocalVarData.end())
return Address::invalid();
return VDI->second.second;
}
void CGOpenMPRuntimeNVPTX::functionFinished(CodeGenFunction &CGF) {
FunctionToGlobalRecPtr.erase(CGF.CurFn);
FunctionGlobalizedDecls.erase(CGF.CurFn);
CGOpenMPRuntime::functionFinished(CGF);
}

View File

@ -61,6 +61,12 @@ private:
/// function.
void emitGenericEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
/// Helper for generic variables globalization prolog.
void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc);
/// Helper for generic variables globalization epilog.
void emitGenericVarsEpilog(CodeGenFunction &CGF);
/// \brief Helper for Spmd mode target directive's entry function.
void emitSpmdEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
const OMPExecutableDirective &D);
@ -332,13 +338,18 @@ private:
/// The map of local variables to their addresses in the global memory.
using DeclToAddrMapTy = llvm::MapVector<const Decl *,
std::pair<const FieldDecl *, Address>>;
/// Set of the parameters passed by value escaping OpenMP context.
using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>;
struct FunctionData {
DeclToAddrMapTy LocalVarData;
const RecordDecl *GlobalRecord = nullptr;
llvm::Value *GlobalRecordAddr = nullptr;
EscapedParamsTy EscapedParameters;
std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;
};
/// Maps the function to the list of the globalized variables with their
/// addresses.
llvm::DenseMap<llvm::Function *,
std::pair<const RecordDecl *, DeclToAddrMapTy>>
FunctionGlobalizedDecls;
/// Map from function to global record pointer.
llvm::DenseMap<llvm::Function *, llvm::Value *> FunctionToGlobalRecPtr;
llvm::SmallDenseMap<llvm::Function *, FunctionData> FunctionGlobalizedDecls;
};
} // CodeGen namespace.

View File

@ -1262,7 +1262,7 @@ static void emitEmptyBoundParameters(CodeGenFunction &,
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
// Emit parallel region as a standalone region.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
OMPPrivateScope PrivateScope(CGF);
bool Copyins = CGF.EmitOMPCopyinClause(S);
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
@ -1277,6 +1277,7 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
CGF.EmitOMPPrivateClause(S, PrivateScope);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
Action.Enter(CGF);
CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
};
@ -2100,7 +2101,8 @@ emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
const OMPLoopDirective &S,
CodeGenFunction::JumpDest LoopExit) {
auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
PrePostActionTy &) {
PrePostActionTy &Action) {
Action.Enter(CGF);
bool HasCancel = false;
if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
@ -2682,7 +2684,8 @@ void CodeGenFunction::EmitOMPParallelForDirective(
const OMPParallelForDirective &S) {
// Emit directive as a combined directive that consists of two implicit
// directives: 'parallel' with 'for' directive.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
emitDispatchForLoopBounds);
@ -2695,7 +2698,8 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective(
const OMPParallelForSimdDirective &S) {
// Emit directive as a combined directive that consists of two implicit
// directives: 'parallel' with 'for' directive.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
emitDispatchForLoopBounds);
};
@ -2707,7 +2711,8 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective(
const OMPParallelSectionsDirective &S) {
// Emit directive as a combined directive that consists of two implicit
// directives: 'parallel' with 'sections' directive.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
CGF.EmitSections(S);
};
emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
@ -4033,12 +4038,13 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
// Emit teams region as a standalone region.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
OMPPrivateScope PrivateScope(CGF);
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
CGF.EmitOMPPrivateClause(S, PrivateScope);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
Action.Enter(CGF);
CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
};
@ -4099,10 +4105,11 @@ emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
// Emit teams region as a standalone region.
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
PrePostActionTy &) {
PrePostActionTy &Action) {
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
Action.Enter(CGF);
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
CodeGenDistribute);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
@ -4144,10 +4151,11 @@ static void emitTargetTeamsDistributeSimdRegion(
// Emit teams region as a standalone region.
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
PrePostActionTy &) {
PrePostActionTy &Action) {
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
Action.Enter(CGF);
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
CodeGenDistribute);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
@ -4188,10 +4196,11 @@ void CodeGenFunction::EmitOMPTeamsDistributeDirective(
// Emit teams region as a standalone region.
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
PrePostActionTy &) {
PrePostActionTy &Action) {
OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
Action.Enter(CGF);
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
CodeGenDistribute);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
@ -4209,10 +4218,11 @@ void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
// Emit teams region as a standalone region.
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
PrePostActionTy &) {
PrePostActionTy &Action) {
OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
Action.Enter(CGF);
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
CodeGenDistribute);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
@ -4231,10 +4241,11 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
// Emit teams region as a standalone region.
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
PrePostActionTy &) {
PrePostActionTy &Action) {
OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
Action.Enter(CGF);
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
CodeGenDistribute);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
@ -4253,10 +4264,11 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
// Emit teams region as a standalone region.
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
PrePostActionTy &) {
PrePostActionTy &Action) {
OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
Action.Enter(CGF);
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
@ -4277,10 +4289,11 @@ static void emitTargetTeamsDistributeParallelForRegion(
// Emit teams region as a standalone region.
auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
PrePostActionTy &) {
PrePostActionTy &Action) {
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
Action.Enter(CGF);
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
@ -4328,10 +4341,11 @@ static void emitTargetTeamsDistributeParallelForSimdRegion(
// Emit teams region as a standalone region.
auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
PrePostActionTy &) {
PrePostActionTy &Action) {
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
Action.Enter(CGF);
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
@ -4599,12 +4613,13 @@ static void emitTargetParallelRegion(CodeGenFunction &CGF,
// Get the captured statement associated with the 'parallel' region.
auto *CS = S.getCapturedStmt(OMPD_parallel);
Action.Enter(CGF);
auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
CGF.EmitOMPPrivateClause(S, PrivateScope);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
Action.Enter(CGF);
// TODO: Add support for clauses.
CGF.EmitStmt(CS->getCapturedStmt());
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
@ -4643,7 +4658,8 @@ static void emitTargetParallelForRegion(CodeGenFunction &CGF,
Action.Enter(CGF);
// Emit directive as a combined directive that consists of two implicit
// directives: 'parallel' with 'for' directive.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
CodeGenFunction::OMPCancelStackRAII CancelRegion(
CGF, OMPD_target_parallel_for, S.hasCancel());
CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
@ -4683,7 +4699,8 @@ emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
Action.Enter(CGF);
// Emit directive as a combined directive that consists of two implicit
// directives: 'parallel' with 'for' directive.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
emitDispatchForLoopBounds);
};

View File

@ -0,0 +1,44 @@
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
// expected-no-diagnostics
int foo(int &a) { return a; }
int bar() {
int a;
return foo(a);
}
// CHECK: define void @__omp_offloading_{{.*}}maini1{{.*}}_l[[@LINE+5]](i32* dereferenceable{{.*}})
// CHECK-NOT: @__kmpc_data_sharing_push_stack
int maini1() {
int a;
#pragma omp target parallel map(from:a)
{
int b;
a = foo(b) + bar();
}
return a;
}
// parallel region
// CHECK: define {{.*}}void @{{.*}}(i32* noalias {{.*}}, i32* noalias {{.*}}, i32* dereferenceable{{.*}})
// CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0)
// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_ST:%struct[.].*]]*
// CHECK: [[B_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[GLOBALS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
// CHECK: call {{.*}}[[FOO:@.*foo.*]](i32* dereferenceable{{.*}} [[B_ADDR]])
// CHECK: call {{.*}}[[BAR:@.*bar.*]]()
// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]])
// CHECK: ret void
// CHECK: define {{.*}}[[FOO]](i32* dereferenceable{{.*}})
// CHECK-NOT: @__kmpc_data_sharing_push_stack
// CHECK: define {{.*}}[[BAR]]()
// CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0)
// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_ST:%struct[.].*]]*
// CHECK: [[A_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[GLOBALS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
// CHECK: call {{.*}}[[FOO]](i32* dereferenceable{{.*}} [[A_ADDR]])
// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]])
// CHECK: ret i32

View File

@ -36,8 +36,12 @@ int main (int argc, char **argv) {
// CK1: store {{.+}} 0, {{.+}},
// CK1: store i{{[0-9]+}} [[ARGC]], i{{[0-9]+}}* [[ARGCADDR]],
// CK1-64: [[CONV:%.+]] = bitcast i{{[0-9]+}}* [[ARGCADDR]] to i{{[0-9]+}}*
// CK1-64: store i{{[0-9]+}}* [[CONV]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
// CK1-32: store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
// CK1: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 4, i16 0)
// CK1-64: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV]]
// CK1-32: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[ARGCADDR]]
// CK1: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
// CK1: store i{{[0-9]+}} [[ARG]], i{{[0-9]+}}* [[ARGCADDR]],
// CK1: store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
// CK1: [[ARGCADDR_PTR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[ARGCADDR_PTR]],
// CK1: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[ARGCADDR_PTR_REF]],
// CK1-NOT: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(
@ -49,6 +53,10 @@ int main (int argc, char **argv) {
// CK1: [[ARGCADDR_PTR:%.+]] = alloca i{{.+}}***,
// CK1: [[ARGCADDR:%.+]] = alloca i{{.+}}**,
// CK1: store i{{.+}}** [[ARGC]], i{{.+}}*** [[ARGCADDR]]
// CK1: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} {{4|8}}, i16 0)
// CK1: [[ARG:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** [[ARGCADDR]]
// CK1: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
// CK1: store i{{[0-9]+}}** [[ARG]], i{{[0-9]+}}*** [[ARGCADDR]],
// CK1: store i8*** [[ARGCADDR]], i8**** [[ARGCADDR_PTR]],
// CK1: [[ARGCADDR_PTR_REF:%.+]] = load i{{.+}}**, i{{.+}}*** [[ARGCADDR_PTR]],
// CK1: store i{{[0-9]+}}** null, i{{[0-9]+}}*** [[ARGCADDR_PTR_REF]],
@ -104,8 +112,12 @@ int main (int argc, char **argv) {
// CK2-64: [[ACONV:%.+]] = bitcast i64* [[AADDR]] to i32*
// CK2-64: [[BCONV:%.+]] = bitcast i64* [[BADDR]] to i32*
// CK2-64: [[CONV:%.+]] = bitcast i64* [[ARGCADDR]] to i32*
// CK2-64: store i{{[0-9]+}}* [[CONV]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
// CK2-32: store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
// CK2: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 4, i16 0)
// CK2-64: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV]]
// CK2-32: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[ARGCADDR]]
// CK2: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
// CK2: store i{{[0-9]+}} [[ARG]], i{{[0-9]+}}* [[ARGCADDR]],
// CK2: store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
// CK2: [[ARGCADDR_PTR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[ARGCADDR_PTR]],
// CK2: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[ARGCADDR_PTR_REF]],
// CK2-NOT: {{.+}} = call i32 @__kmpc_push_num_teams(
@ -121,6 +133,10 @@ int main (int argc, char **argv) {
// CK2: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[AADDR]],
// CK2: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[BADDR]],
// CK2: store i{{[0-9]+}}** [[ARGC]], i{{[0-9]+}}*** [[ARGCADDR]],
// CK2: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} {{4|8}}, i16 0)
// CK2: [[ARG:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** [[ARGCADDR]]
// CK2: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
// CK2: store i{{[0-9]+}}** [[ARG]], i{{[0-9]+}}*** [[ARGCADDR]],
// CK2: store i{{[0-9]+}}*** [[ARGCADDR]], i{{[0-9]+}}**** [[ARGCADDR_PTR]],
// CK2: [[ARGCADDR_PTR_REF:%.+]] = load i{{[0-9]+}}***, i{{[0-9]+}}**** [[ARGCADDR_PTR]],
// CK2: store i{{[0-9]+}}** null, i{{[0-9]+}}*** [[ARGCADDR_PTR_REF]],