[OPENMP] Allow runtime insert its own code inside OpenMP regions.

Solution unifies interface of RegionCodeGenTy type to allow insert
runtime-specific code before/after main codegen action defined in
CGStmtOpenMP.cpp file. Runtime should not define its own RegionCodeGenTy
for general OpenMP directives, but must be allowed to insert its own
 (required) code to support target specific codegen.

llvm-svn: 264700
This commit is contained in:
Alexey Bataev 2016-03-29 05:34:15 +00:00
parent bb7ce3b850
commit 14fa1c6b60
10 changed files with 611 additions and 505 deletions

File diff suppressed because it is too large Load Diff

View File

@ -46,7 +46,44 @@ class Address;
class CodeGenFunction;
class CodeGenModule;
typedef llvm::function_ref<void(CodeGenFunction &)> RegionCodeGenTy;
/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
/// region.
class PrePostActionTy {
public:
explicit PrePostActionTy() {}
virtual void Enter(CodeGenFunction &CGF) {}
virtual void Exit(CodeGenFunction &CGF) {}
virtual ~PrePostActionTy() {}
};
/// Class provides a way to call simple version of codegen for OpenMP region, or
/// an advanced with possible pre|post-actions in codegen.
class RegionCodeGenTy final {
intptr_t CodeGen;
typedef void (*CodeGenTy)(intptr_t, CodeGenFunction &, PrePostActionTy &);
CodeGenTy Callback;
mutable PrePostActionTy *PrePostAction;
RegionCodeGenTy() = delete;
RegionCodeGenTy &operator=(const RegionCodeGenTy &) = delete;
template <typename Callable>
static void CallbackFn(intptr_t CodeGen, CodeGenFunction &CGF,
PrePostActionTy &Action) {
return (*reinterpret_cast<Callable *>(CodeGen))(CGF, Action);
}
public:
template <typename Callable>
RegionCodeGenTy(
Callable &&CodeGen,
typename std::enable_if<
!std::is_same<typename std::remove_reference<Callable>::type,
RegionCodeGenTy>::value>::type * = nullptr)
: CodeGen(reinterpret_cast<intptr_t>(&CodeGen)),
Callback(CallbackFn<typename std::remove_reference<Callable>::type>),
PrePostAction(nullptr) {}
void setAction(PrePostActionTy &Action) const { PrePostAction = &Action; }
void operator()(CodeGenFunction &CGF) const;
};
class CGOpenMPRuntime {
protected:
@ -82,14 +119,14 @@ private:
OpenMPDefaultLocMapTy OpenMPDefaultLocMap;
Address getOrCreateDefaultLocation(unsigned Flags);
llvm::StructType *IdentTy;
llvm::StructType *IdentTy = nullptr;
/// \brief Map for SourceLocation and OpenMP runtime library debug locations.
typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDebugLocMapTy;
OpenMPDebugLocMapTy OpenMPDebugLocMap;
/// \brief The type for a microtask which gets passed to __kmpc_fork_call().
/// Original representation is:
/// typedef void (kmpc_micro)(kmp_int32 global_tid, kmp_int32 bound_tid,...);
llvm::FunctionType *Kmpc_MicroTy;
llvm::FunctionType *Kmpc_MicroTy = nullptr;
/// \brief Stores debug location and ThreadID for the function.
struct DebugLocThreadIdTy {
llvm::Value *DebugLoc;
@ -810,13 +847,15 @@ public:
/// \param OutlinedFn Outlined function value to be defined by this call.
/// \param OutlinedFnID Outlined function ID value to be defined by this call.
/// \param IsOffloadEntry True if the outlined function is an offload entry.
/// \param CodeGen Code generation sequence for the \a D directive.
/// An oulined function may not be an entry if, e.g. the if clause always
/// evaluates to false.
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
StringRef ParentName,
llvm::Function *&OutlinedFn,
llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry);
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen);
/// \brief Emit the target offloading code associated with \a D. The emitted
/// code attempts offloading the execution to the device, an the event of

View File

@ -305,28 +305,32 @@ void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *ID,
void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
const OMPExecutableDirective &D, StringRef ParentName,
llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry) {
bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
if (!IsOffloadEntry) // Nothing to do.
return;
assert(!ParentName.empty() && "Invalid target region parent name!");
const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
EntryFunctionState EST;
WorkerFunctionState WST(CGM);
// Emit target region as a standalone region.
auto &&CodeGen = [&EST, &WST, &CS, &D, this](CodeGenFunction &CGF) {
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
(void)CGF.EmitOMPFirstprivateClause(D, PrivateScope);
CGF.EmitOMPPrivateClause(D, PrivateScope);
(void)PrivateScope.Privatize();
class NVPTXPrePostActionTy : public PrePostActionTy {
CGOpenMPRuntimeNVPTX &RT;
CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST;
emitEntryHeader(CGF, EST, WST);
CGF.EmitStmt(CS.getCapturedStmt());
emitEntryFooter(CGF, EST);
};
public:
NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT,
CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST)
: RT(RT), EST(EST), WST(WST) {}
void Enter(CodeGenFunction &CGF) override {
RT.emitEntryHeader(CGF, EST, WST);
}
void Exit(CodeGenFunction &CGF) override { RT.emitEntryFooter(CGF, EST); }
} Action(*this, EST, WST);
CodeGen.setAction(Action);
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
IsOffloadEntry, CodeGen);

View File

@ -24,6 +24,34 @@ namespace clang {
namespace CodeGen {
class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime {
public:
class EntryFunctionState {
public:
llvm::BasicBlock *ExitBB;
EntryFunctionState() : ExitBB(nullptr){};
};
class WorkerFunctionState {
public:
llvm::Function *WorkerFn;
const CGFunctionInfo *CGFI;
WorkerFunctionState(CodeGenModule &CGM);
private:
void createWorkerFunction(CodeGenModule &CGM);
};
/// \brief Helper for target entry function. Guide the master and worker
/// threads to their respective locations.
void emitEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
WorkerFunctionState &WST);
/// \brief Signal termination of OMP execution.
void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
private:
//
// NVPTX calls.
//
@ -66,24 +94,6 @@ class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime {
// Outlined function for the workers to execute.
llvm::GlobalVariable *WorkID;
class EntryFunctionState {
public:
llvm::BasicBlock *ExitBB;
EntryFunctionState() : ExitBB(nullptr){};
};
class WorkerFunctionState {
public:
llvm::Function *WorkerFn;
const CGFunctionInfo *CGFI;
WorkerFunctionState(CodeGenModule &CGM);
private:
void createWorkerFunction(CodeGenModule &CGM);
};
/// \brief Initialize master-worker control state.
void initializeEnvironment();
@ -93,14 +103,6 @@ class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime {
/// \brief Helper for worker function. Emit body of worker loop.
void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST);
/// \brief Helper for target entry function. Guide the master and worker
/// threads to their respective locations.
void emitEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
WorkerFunctionState &WST);
/// \brief Signal termination of OMP execution.
void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
/// \brief Returns specified OpenMP runtime function for the current OpenMP
/// implementation. Specialized for the NVPTX device.
/// \param Function OpenMP runtime function.
@ -129,7 +131,8 @@ class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime {
StringRef ParentName,
llvm::Function *&OutlinedFn,
llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry) override;
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen) override;
public:
explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);

View File

@ -26,8 +26,7 @@ using namespace CodeGen;
namespace {
/// Lexical scope for OpenMP executable constructs, that handles correct codegen
/// for captured expressions.
class OMPLexicalScope {
CodeGenFunction::LexicalScope Scope;
class OMPLexicalScope : public CodeGenFunction::LexicalScope {
void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
for (const auto *C : S.clauses()) {
if (auto *CPI = OMPClauseWithPreInit::get(C)) {
@ -48,10 +47,11 @@ class OMPLexicalScope {
public:
OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
: Scope(CGF, S.getSourceRange()) {
: CodeGenFunction::LexicalScope(CGF, S.getSourceRange()) {
emitPreInitStmt(CGF, S);
}
};
} // namespace
llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
@ -1097,8 +1097,6 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
OpenMPDirectiveKind InnermostKind,
const RegionCodeGenTy &CodeGen) {
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
emitParallelOrTeamsOutlinedFunction(S,
*CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
@ -1110,7 +1108,7 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
CGF, NumThreads, NumThreadsClause->getLocStart());
}
if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
CGF.CGM.getOpenMPRuntime().emitProcBindClause(
CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
}
@ -1122,14 +1120,17 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
break;
}
}
OMPLexicalScope Scope(CGF, S);
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
CapturedVars, IfCond);
}
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
OMPLexicalScope Scope(*this, S);
// Emit parallel region as a standalone region.
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
OMPPrivateScope PrivateScope(CGF);
bool Copyins = CGF.EmitOMPCopyinClause(S);
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
@ -1465,7 +1466,7 @@ void CodeGenFunction::EmitOMPSimdFinal(
}
void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
// if (PreCond) {
// for (IV in 0..LastIteration) BODY;
// <Final counter/linear vars updates>;
@ -1508,7 +1509,6 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
emitAlignedClause(CGF, S);
CGF.EmitOMPLinearClauseInit(S);
bool HasLastprivateClause;
{
OMPPrivateScope LoopScope(CGF);
emitPrivateLoopCounters(CGF, LoopScope, S.counters(),
@ -1516,7 +1516,7 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
emitPrivateLinearVars(CGF, S, LoopScope);
CGF.EmitOMPPrivateClause(S, LoopScope);
CGF.EmitOMPReductionClauseInit(S, LoopScope);
HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
(void)LoopScope.Privatize();
CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
S.getInc(),
@ -1526,9 +1526,8 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
},
[](CodeGenFunction &) {});
// Emit final copy of the lastprivate variables at the end of loops.
if (HasLastprivateClause) {
if (HasLastprivateClause)
CGF.EmitOMPLastprivateClauseFinal(S);
}
CGF.EmitOMPReductionClauseFinal(S);
emitPostUpdateForReductionClause(
CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
@ -1543,6 +1542,7 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
CGF.EmitBlock(ContBlock, true);
}
};
OMPLexicalScope Scope(*this, S);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
}
@ -1928,11 +1928,12 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
bool HasLastprivates = false;
auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
PrePostActionTy &) {
HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
};
{
OMPLexicalScope Scope(*this, S);
auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
};
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
S.hasCancel());
}
@ -1945,11 +1946,12 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
bool HasLastprivates = false;
auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
PrePostActionTy &) {
HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
};
{
OMPLexicalScope Scope(*this, S);
auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
};
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
}
@ -1972,7 +1974,8 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
auto *CS = dyn_cast<CompoundStmt>(Stmt);
bool HasLastprivates = false;
auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF) {
auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
PrePostActionTy &) {
auto &C = CGF.CGM.getContext();
auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
// Emit helper vars inits.
@ -2112,10 +2115,10 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
}
void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
OMPLexicalScope Scope(*this, S);
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
};
OMPLexicalScope Scope(*this, S);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
S.hasCancel());
}
@ -2137,17 +2140,17 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
AssignmentOps.append(C->assignment_ops().begin(),
C->assignment_ops().end());
}
// Emit code for 'single' region along with 'copyprivate' clauses
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
OMPPrivateScope SingleScope(CGF);
(void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
CGF.EmitOMPPrivateClause(S, SingleScope);
(void)SingleScope.Privatize();
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
};
{
OMPLexicalScope Scope(*this, S);
// Emit code for 'single' region along with 'copyprivate' clauses
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
CodeGenFunction::OMPPrivateScope SingleScope(CGF);
(void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
CGF.EmitOMPPrivateClause(S, SingleScope);
(void)SingleScope.Privatize();
CGF.EmitStmt(
cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
};
CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
CopyprivateVars, DestExprs,
SrcExprs, AssignmentOps);
@ -2162,21 +2165,23 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
}
void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
OMPLexicalScope Scope(*this, S);
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
};
OMPLexicalScope Scope(*this, S);
CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
}
void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
OMPLexicalScope Scope(*this, S);
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
};
Expr *Hint = nullptr;
if (auto *HintClause = S.getSingleClause<OMPHintClause>())
Hint = HintClause->getHint();
OMPLexicalScope Scope(*this, S);
CGM.getOpenMPRuntime().emitCriticalRegion(*this,
S.getDirectiveName().getAsString(),
CodeGen, S.getLocStart(), Hint);
@ -2186,8 +2191,7 @@ void CodeGenFunction::EmitOMPParallelForDirective(
const OMPParallelForDirective &S) {
// Emit directive as a combined directive that consists of two implicit
// directives: 'parallel' with 'for' directive.
OMPLexicalScope Scope(*this, S);
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitOMPWorksharingLoop(S);
};
emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen);
@ -2197,8 +2201,7 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective(
const OMPParallelForSimdDirective &S) {
// Emit directive as a combined directive that consists of two implicit
// directives: 'parallel' with 'for' directive.
OMPLexicalScope Scope(*this, S);
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitOMPWorksharingLoop(S);
};
emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen);
@ -2208,14 +2211,14 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective(
const OMPParallelSectionsDirective &S) {
// Emit directive as a combined directive that consists of two implicit
// directives: 'parallel' with 'sections' directive.
OMPLexicalScope Scope(*this, S);
auto &&CodeGen = [&S](CodeGenFunction &CGF) { CGF.EmitSections(S); };
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitSections(S);
};
emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen);
}
void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
// Emit outlined function for task construct.
OMPLexicalScope Scope(*this, S);
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
auto *I = CS->getCapturedDecl()->param_begin();
@ -2265,46 +2268,47 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
}
}
auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars](
CodeGenFunction &CGF) {
CodeGenFunction &CGF, PrePostActionTy &) {
// Set proper addresses for generated private copies.
auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
OMPPrivateScope Scope(CGF);
if (!PrivateVars.empty() || !FirstprivateVars.empty()) {
auto *CopyFn = CGF.Builder.CreateLoad(
CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
auto *PrivatesPtr = CGF.Builder.CreateLoad(
CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
// Map privates.
llvm::SmallVector<std::pair<const VarDecl *, Address>, 16>
PrivatePtrs;
llvm::SmallVector<llvm::Value *, 16> CallArgs;
CallArgs.push_back(PrivatesPtr);
for (auto *E : PrivateVars) {
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
Address PrivatePtr =
CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
CallArgs.push_back(PrivatePtr.getPointer());
{
OMPPrivateScope Scope(CGF);
if (!PrivateVars.empty() || !FirstprivateVars.empty()) {
auto *CopyFn = CGF.Builder.CreateLoad(
CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
auto *PrivatesPtr = CGF.Builder.CreateLoad(
CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
// Map privates.
llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
llvm::SmallVector<llvm::Value *, 16> CallArgs;
CallArgs.push_back(PrivatesPtr);
for (auto *E : PrivateVars) {
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
Address PrivatePtr =
CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
CallArgs.push_back(PrivatePtr.getPointer());
}
for (auto *E : FirstprivateVars) {
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
Address PrivatePtr =
CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
CallArgs.push_back(PrivatePtr.getPointer());
}
CGF.EmitRuntimeCall(CopyFn, CallArgs);
for (auto &&Pair : PrivatePtrs) {
Address Replacement(CGF.Builder.CreateLoad(Pair.second),
CGF.getContext().getDeclAlign(Pair.first));
Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
}
}
for (auto *E : FirstprivateVars) {
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
Address PrivatePtr =
CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
CallArgs.push_back(PrivatePtr.getPointer());
}
CGF.EmitRuntimeCall(CopyFn, CallArgs);
for (auto &&Pair : PrivatePtrs) {
Address Replacement(CGF.Builder.CreateLoad(Pair.second),
CGF.getContext().getDeclAlign(Pair.first));
Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
(void)Scope.Privatize();
if (*PartId) {
// TODO: emit code for untied tasks.
}
CGF.EmitStmt(CS->getCapturedStmt());
}
(void)Scope.Privatize();
if (*PartId) {
// TODO: emit code for untied tasks.
}
CGF.EmitStmt(CS->getCapturedStmt());
};
auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
S, *I, OMPD_task, CodeGen);
@ -2334,6 +2338,7 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
break;
}
}
OMPLexicalScope Scope(*this, S);
CGM.getOpenMPRuntime().emitTaskCall(
*this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy,
CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars,
@ -2355,10 +2360,11 @@ void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
void CodeGenFunction::EmitOMPTaskgroupDirective(
const OMPTaskgroupDirective &S) {
OMPLexicalScope Scope(*this, S);
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
};
OMPLexicalScope Scope(*this, S);
CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
}
@ -2490,10 +2496,10 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
void CodeGenFunction::EmitOMPDistributeDirective(
const OMPDistributeDirective &S) {
LexicalScope Scope(*this, S.getSourceRange());
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitOMPDistributeLoop(S);
};
OMPLexicalScope Scope(*this, S);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
false);
}
@ -2511,9 +2517,9 @@ static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
if (!S.getAssociatedStmt())
return;
OMPLexicalScope Scope(*this, S);
auto *C = S.getSingleClause<OMPSIMDClause>();
auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) {
auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
PrePostActionTy &Action) {
if (C) {
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
@ -2521,10 +2527,12 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars);
} else {
Action.Enter(CGF);
CGF.EmitStmt(
cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
}
};
OMPLexicalScope Scope(*this, S);
CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
}
@ -2970,18 +2978,39 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
}
}
OMPLexicalScope Scope(*this, S);
auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF) {
auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
PrePostActionTy &) {
CGF.EmitStopPoint(CS);
EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
S.getV(), S.getExpr(), S.getUpdateExpr(),
S.isXLHSInRHSPart(), S.getLocStart());
};
OMPLexicalScope Scope(*this, S);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
}
std::pair<llvm::Function * /*OutlinedFn*/, llvm::Constant * /*OutlinedFnID*/>
CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction(
CodeGenModule &CGM, const OMPTargetDirective &S, StringRef ParentName,
bool IsOffloadEntry) {
llvm::Function *OutlinedFn = nullptr;
llvm::Constant *OutlinedFnID = nullptr;
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
OMPPrivateScope PrivateScope(CGF);
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
CGF.EmitOMPPrivateClause(S, PrivateScope);
(void)PrivateScope.Privatize();
Action.Enter(CGF);
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
};
// Emit target region as a standalone region.
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
S, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen);
return std::make_pair(OutlinedFn, OutlinedFnID);
}
void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
OMPLexicalScope Scope(*this, S);
const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
@ -3027,9 +3056,9 @@ void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
ParentName =
CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl)));
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
IsOffloadEntry);
std::tie(Fn, FnID) = EmitOMPTargetDirectiveOutlinedFunction(
CGM, S, ParentName, IsOffloadEntry);
OMPLexicalScope Scope(*this, S);
CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device,
CapturedVars);
}
@ -3039,8 +3068,6 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
OpenMPDirectiveKind InnermostKind,
const RegionCodeGenTy &CodeGen) {
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
emitParallelOrTeamsOutlinedFunction(S,
*CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
@ -3063,14 +3090,16 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
ThreadLimitVal, S.getLocStart());
}
OMPLexicalScope Scope(CGF, S);
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
CapturedVars);
}
void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
LexicalScope Scope(*this, S.getSourceRange());
// Emit parallel region as a standalone region.
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
OMPPrivateScope PrivateScope(CGF);
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
CGF.EmitOMPPrivateClause(S, PrivateScope);
@ -3112,10 +3141,12 @@ CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
void CodeGenFunction::EmitOMPTargetDataDirective(
const OMPTargetDataDirective &S) {
// emit the code inside the construct for now
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
OMPLexicalScope Scope(*this, S);
CGM.getOpenMPRuntime().emitInlinedDirective(
*this, OMPD_target_data,
[&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
*this, OMPD_target_data, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitStmt(
cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
});
}
void CodeGenFunction::EmitOMPTargetEnterDataDirective(
@ -3140,18 +3171,22 @@ void CodeGenFunction::EmitOMPTargetParallelForDirective(
void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
// emit the code inside the construct for now
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
OMPLexicalScope Scope(*this, S);
CGM.getOpenMPRuntime().emitInlinedDirective(
*this, OMPD_taskloop,
[&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
*this, OMPD_taskloop, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitStmt(
cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
});
}
void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
const OMPTaskLoopSimdDirective &S) {
// emit the code inside the construct for now
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
OMPLexicalScope Scope(*this, S);
CGM.getOpenMPRuntime().emitInlinedDirective(
*this, OMPD_taskloop_simd,
[&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
*this, OMPD_taskloop_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitStmt(
cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
});
}

View File

@ -2366,6 +2366,13 @@ public:
void EmitOMPDistributeDirective(const OMPDistributeDirective &S);
void EmitOMPDistributeLoop(const OMPDistributeDirective &S);
/// Emit outlined function for the target directive.
static std::pair<llvm::Function * /*OutlinedFn*/,
llvm::Constant * /*OutlinedFnID*/>
EmitOMPTargetDirectiveOutlinedFunction(CodeGenModule &CGM,
const OMPTargetDirective &S,
StringRef ParentName,
bool IsOffloadEntry);
/// \brief Emit inner loop of the worksharing/simd construct.
///
/// \param S Directive, for which the inner loop must be emitted.

View File

@ -39,7 +39,11 @@ int main() {
#pragma omp critical(the_name1) hint(23)
foo();
// CHECK: call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]])
// CHECK: br label
// CHECK-NOT: call {{.*}}void @__kmpc_end_critical(
// CHECK: br label
// CHECK-NOT: call {{.*}}void @__kmpc_end_critical(
// CHECK: br label
if (a)
#pragma omp critical(the_name)
while (1)

View File

@ -87,10 +87,6 @@ int main() {
// TLS-LAMBDA: [[G_CPY_VAL:%.+]] = call{{( cxx_fast_tlscc)?}} i{{[0-9]+}}* [[G_CTOR:@.+]]()
// TLS-LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i32* [[G_CPY_VAL]])
// TLS-LAMBDA: define {{.*}}i{{[0-9]+}}* [[G_CTOR]]()
// TLS-LAMBDA: ret i{{[0-9]+}}* [[G]]
// TLS-LAMBDA: }
#pragma omp parallel copyin(g)
{
// LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
@ -122,6 +118,11 @@ int main() {
g = 1;
// LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}*
// TLS-LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}*
// TLS-LAMBDA: define {{.*}}i{{[0-9]+}}* [[G_CTOR]]()
// TLS-LAMBDA: ret i{{[0-9]+}}* [[G]]
// TLS-LAMBDA: }
[&]() {
// LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
// LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
@ -149,9 +150,6 @@ int main() {
// TLS-BLOCKS: [[G_CPY_VAL:%.+]] = call{{( cxx_fast_tlscc)?}} i{{[0-9]+}}* [[G_CTOR:@.+]]()
// TLS-BLOCKS: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i32* [[G_CPY_VAL]])
// TLS-BLOCKS: define {{.*}}i{{[0-9]+}}* [[G_CTOR]]()
// TLS-BLOCKS: ret i{{[0-9]+}}* [[G]]
// TLS-BLOCKS: }
#pragma omp parallel copyin(g)
{
// BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
@ -189,6 +187,10 @@ int main() {
// TLS-BLOCKS: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_CAPTURE_DST]]
// TLS-BLOCKS-NOT: [[G]]{{[[^:word:]]}}
// TLS-BLOCKS: call {{.*}}void {{%.+}}(i8
// TLS-BLOCKS: define {{.*}}i{{[0-9]+}}* [[G_CTOR]]()
// TLS-BLOCKS: ret i{{[0-9]+}}* [[G]]
// TLS-BLOCKS: }
^{
// BLOCKS: define {{.+}} void {{@.+}}(i8*
// TLS-BLOCKS: define {{.+}} void {{@.+}}(i8*

View File

@ -111,8 +111,8 @@ int main() {
// CHECK-NEXT: invoke void [[FOO]]()
// CHECK: to label {{%?}}[[CONT:.+]] unwind
// CHECK: [[CONT]]
// CHECK: store i32 1, i32* [[DID_IT]]
// CHECK: call void @__kmpc_end_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
// CHECK: store i32 1, i32* [[DID_IT]]
// CHECK-NEXT: br label {{%?}}[[EXIT]]
// CHECK: [[EXIT]]
// CHECK: [[A_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
@ -255,8 +255,8 @@ void array_func(int n, int a[n], St s[2]) {
// CHECK-LABEL: invoke void @_ZZN2SSC1ERiENKUlvE_clEv(
// CHECK-SAME: [[CAP_TY]]* [[CAP]])
// CHECK: store i32 1, i32* [[DID_IT]],
// CHECK: call void @__kmpc_end_single([[IDENT_T_TY]]* @{{.+}}, i32 %{{.+}})
// CHECK: store i32 1, i32* [[DID_IT]],
// CHECK: br label
// CHECK: call void @__kmpc_end_single(%{{.+}}* @{{.+}}, i32 %{{.+}})
@ -334,8 +334,8 @@ void array_func(int n, int a[n], St s[2]) {
// CHECK-NEXT: load i32, i32* %
// CHECK-NEXT: sdiv i32 %{{.+}}, 1
// CHECK-NEXT: store i32 %
// CHECK-NEXT: store i32 1, i32* [[DID_IT]],
// CHECK-NEXT: call void @__kmpc_end_single([[IDENT_T_TY]]* @{{.+}}, i32 %{{.+}})
// CHECK-NEXT: store i32 1, i32* [[DID_IT]],
// CHECK-NEXT: br label
// CHECK: getelementptr inbounds [3 x i8*], [3 x i8*]* [[LIST:%.+]], i64 0, i64 0
@ -376,8 +376,8 @@ void array_func(int n, int a[n], St s[2]) {
// CHECK-NEXT: store double* %
// CHECK-LABEL: invoke void @_ZZN3SSTIdEC1EvENKUlvE_clEv(
// CHECK: store i32 1, i32* [[DID_IT]],
// CHECK-NEXT: call void @__kmpc_end_single([[IDENT_T_TY]]* @{{.+}}, i32 %{{.+}})
// CHECK: call void @__kmpc_end_single([[IDENT_T_TY]]* @{{.+}}, i32 %{{.+}})
// CHECK-NEXT: store i32 1, i32* [[DID_IT]],
// CHECK-NEXT: br label
// CHECK: call void @__kmpc_end_single([[IDENT_T_TY]]* @{{.+}}, i32 %{{.+}})

View File

@ -32,6 +32,7 @@ int main() {
foo();
// CHECK-NOT: call {{.*}}void @__kmpc_taskgroup
// CHECK-NOT: call {{.*}}void @__kmpc_end_taskgroup
// CHECK: ret
return a;
}