Code reformatting and improvement for OpenMP.

Moved CGOpenMPRegionInfo from CGOpenMPRuntime.h to CGOpenMPRuntime.cpp file and reworked the code for this change. Also added processing of ThreadID variable passed as an argument in outlined functions in parallel and task directives.

llvm-svn: 219490
This commit is contained in:
Alexey Bataev 2014-10-10 12:19:54 +00:00
parent 1a8093c3a7
commit 1809571c76
5 changed files with 148 additions and 108 deletions

View File

@ -13,6 +13,7 @@
#include "CGOpenMPRuntime.h"
#include "CodeGenFunction.h"
#include "clang/AST/StmtOpenMP.h"
#include "clang/AST/Decl.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/IR/DerivedTypes.h"
@ -24,6 +25,49 @@
using namespace clang;
using namespace CodeGen;
/// \brief API for captured statement code generation in OpenMP constructs.
class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
public:
CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &CS,
const VarDecl *ThreadIDVar)
: CGCapturedStmtInfo(CS, CR_OpenMP), ThreadIDVar(ThreadIDVar),
Directive(D) {
assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
}
virtual ~CGOpenMPRegionInfo() override{};
/// \brief Gets a variable or parameter for storing global thread id
/// inside OpenMP construct.
const VarDecl *getThreadIDVariable() const { return ThreadIDVar; }
/// \brief Gets an LValue for the current ThreadID variable.
LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
static bool classof(const CGCapturedStmtInfo *Info) {
return Info->getKind() == CR_OpenMP;
}
/// \brief Emit the captured statement body.
virtual void EmitBody(CodeGenFunction &CGF, Stmt *S) override;
/// \brief Get the name of the capture helper.
virtual StringRef getHelperName() const override { return ".omp_outlined."; }
private:
/// \brief A variable or parameter storing global thread id for OpenMP
/// constructs.
const VarDecl *ThreadIDVar;
/// \brief OpenMP executable directive associated with the region.
const OMPExecutableDirective &Directive;
};
LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
return CGF.MakeNaturalAlignAddrLValue(
CGF.GetAddrOfLocalVar(ThreadIDVar),
CGF.getContext().getPointerType(ThreadIDVar->getType()));
}
void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, Stmt *S) {
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPFirstprivateClause(Directive, PrivateScope);
@ -51,6 +95,16 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
}
llvm::Value *
CGOpenMPRuntime::EmitOpenMPOutlinedFunction(const OMPExecutableDirective &D,
const VarDecl *ThreadIDVar) {
const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
CodeGenFunction CGF(CGM, true);
CGOpenMPRegionInfo CGInfo(D, *CS, ThreadIDVar);
CGF.CapturedStmtInfo = &CGInfo;
return CGF.GenerateCapturedStmtFunction(*CS);
}
llvm::Value *
CGOpenMPRuntime::GetOrCreateDefaultOpenMPLocation(OpenMPLocationFlags Flags) {
llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
@ -92,14 +146,15 @@ llvm::Value *CGOpenMPRuntime::EmitOpenMPUpdateLocation(
assert(CGF.CurFn && "No function in current CodeGenFunction.");
llvm::Value *LocValue = nullptr;
OpenMPLocMapTy::iterator I = OpenMPLocMap.find(CGF.CurFn);
if (I != OpenMPLocMap.end()) {
LocValue = I->second;
OpenMPLocThreadIDMapTy::iterator I = OpenMPLocThreadIDMap.find(CGF.CurFn);
if (I != OpenMPLocThreadIDMap.end()) {
LocValue = I->second.DebugLoc;
} else {
// Generate "ident_t .kmpc_loc.addr;"
llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
OpenMPLocMap[CGF.CurFn] = AI;
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
Elem.second.DebugLoc = AI;
LocValue = AI;
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
@ -139,46 +194,46 @@ llvm::Value *CGOpenMPRuntime::GetOpenMPThreadID(CodeGenFunction &CGF,
assert(CGF.CurFn && "No function in current CodeGenFunction.");
llvm::Value *ThreadID = nullptr;
OpenMPThreadIDMapTy::iterator I = OpenMPThreadIDMap.find(CGF.CurFn);
if (I != OpenMPThreadIDMap.end()) {
ThreadID = I->second;
} else {
// Check if current function is a function which has first parameter
// with type int32 and name ".global_tid.".
if (!CGF.CurFn->arg_empty() &&
CGF.CurFn->arg_begin()->getType()->isPointerTy() &&
CGF.CurFn->arg_begin()
->getType()
->getPointerElementType()
->isIntegerTy() &&
CGF.CurFn->arg_begin()
->getType()
->getPointerElementType()
->getIntegerBitWidth() == 32 &&
CGF.CurFn->arg_begin()->hasName() &&
CGF.CurFn->arg_begin()->getName() == ".global_tid.") {
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
ThreadID = CGF.Builder.CreateLoad(CGF.CurFn->arg_begin());
} else {
// Generate "int32 .kmpc_global_thread_num.addr;"
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc)};
ThreadID = CGF.EmitRuntimeCall(
CreateRuntimeFunction(OMPRTL__kmpc_global_thread_num), Args);
// Check whether we've already cached a load of the thread id in this
// function.
OpenMPLocThreadIDMapTy::iterator I = OpenMPLocThreadIDMap.find(CGF.CurFn);
if (I != OpenMPLocThreadIDMap.end()) {
ThreadID = I->second.ThreadID;
} else if (auto OMPRegionInfo =
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
// Check if this an outlined function with thread id passed as argument.
auto ThreadIDVar = OMPRegionInfo->getThreadIDVariable();
auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
auto RVal = CGF.EmitLoadOfLValue(LVal, Loc);
LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(),
ThreadIDVar->getType());
ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
// If value loaded in entry block, cache it and use it everywhere in
// function.
if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
Elem.second.ThreadID = ThreadID;
}
OpenMPThreadIDMap[CGF.CurFn] = ThreadID;
} else {
// This is not an outlined function region - need to call __kmpc_int32
// kmpc_global_thread_num(ident_t *loc).
// Generate thread id value and cache this value for use across the
// function.
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc)};
ThreadID = CGF.EmitRuntimeCall(
CreateRuntimeFunction(OMPRTL__kmpc_global_thread_num), Args);
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
Elem.second.ThreadID = ThreadID;
}
return ThreadID;
}
void CGOpenMPRuntime::FunctionFinished(CodeGenFunction &CGF) {
assert(CGF.CurFn && "No function in current CodeGenFunction.");
if (OpenMPThreadIDMap.count(CGF.CurFn))
OpenMPThreadIDMap.erase(CGF.CurFn);
if (OpenMPLocMap.count(CGF.CurFn))
OpenMPLocMap.erase(CGF.CurFn);
if (OpenMPLocThreadIDMap.count(CGF.CurFn))
OpenMPLocThreadIDMap.erase(CGF.CurFn);
}
llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {

View File

@ -14,49 +14,29 @@
#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
#include "CodeGenFunction.h"
#include "clang/AST/StmtOpenMP.h"
#include "clang/AST/Type.h"
#include "clang/Basic/SourceLocation.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
namespace llvm {
class ArrayType;
class Constant;
class Function;
class FunctionType;
class StructType;
class Type;
class Value;
} // namespace llvm
namespace clang {
class OMPExecutableDirective;
class VarDecl;
namespace CodeGen {
/// \brief API for captured statement code generation in OpenMP constructs.
class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
public:
CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &S,
const VarDecl *ThreadIDVar)
: CGCapturedStmtInfo(S, CR_OpenMP), ThreadIDVar(ThreadIDVar),
Directive(D) {}
virtual ~CGOpenMPRegionInfo() override{};
/// \brief Gets a variable or parameter for storing global thread id
/// inside OpenMP construct.
const VarDecl *getThreadIDVariable() const { return ThreadIDVar; }
static bool classof(const CGCapturedStmtInfo *Info) {
return Info->getKind() == CR_OpenMP;
}
/// \brief Emit the captured statement body.
virtual void EmitBody(CodeGenFunction &CGF, Stmt *S) override;
/// \brief Get the name of the capture helper.
virtual StringRef getHelperName() const override { return ".omp_outlined."; }
private:
/// \brief A variable or parameter storing global thread id for OpenMP
/// constructs.
const VarDecl *ThreadIDVar;
/// \brief OpenMP executable directive associated with the region.
const OMPExecutableDirective &Directive;
};
class CodeGenFunction;
class CodeGenModule;
class CGOpenMPRuntime {
public:
@ -102,7 +82,7 @@ private:
/// \brief Default const ident_t object used for initialization of all other
/// ident_t objects.
llvm::Constant *DefaultOpenMPPSource;
/// \brief Map of flags and corrsponding default locations.
/// \brief Map of flags and corresponding default locations.
typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDefaultLocMapTy;
OpenMPDefaultLocMapTy OpenMPDefaultLocMap;
llvm::Value *GetOrCreateDefaultOpenMPLocation(OpenMPLocationFlags Flags);
@ -147,19 +127,22 @@ private:
IdentField_PSource
};
llvm::StructType *IdentTy;
/// \brief Map for Sourcelocation and OpenMP runtime library debug locations.
/// \brief Map for SourceLocation and OpenMP runtime library debug locations.
typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDebugLocMapTy;
OpenMPDebugLocMapTy OpenMPDebugLocMap;
/// \brief The type for a microtask which gets passed to __kmpc_fork_call().
/// Original representation is:
/// typedef void (kmpc_micro)(kmp_int32 global_tid, kmp_int32 bound_tid,...);
llvm::FunctionType *Kmpc_MicroTy;
/// \brief Map of local debug location and functions.
typedef llvm::DenseMap<llvm::Function *, llvm::Value *> OpenMPLocMapTy;
OpenMPLocMapTy OpenMPLocMap;
/// \brief Map of local ThreadID and functions.
typedef llvm::DenseMap<llvm::Function *, llvm::Value *> OpenMPThreadIDMapTy;
OpenMPThreadIDMapTy OpenMPThreadIDMap;
/// \brief Stores debug location and ThreadID for the function.
struct DebugLocThreadIdTy {
llvm::Value *DebugLoc;
llvm::Value *ThreadID;
};
/// \brief Map of local debug location, ThreadId and functions.
typedef llvm::DenseMap<llvm::Function *, DebugLocThreadIdTy>
OpenMPLocThreadIDMapTy;
OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap;
/// \brief Type kmp_critical_name, originally defined as typedef kmp_int32
/// kmp_critical_name[8];
llvm::ArrayType *KmpCriticalNameTy;
@ -167,8 +150,6 @@ private:
llvm::StringMap<llvm::Value *, llvm::BumpPtrAllocator> CriticalRegionVarNames;
/// \brief Emits object of ident_t type with info for source location.
/// \param CGF Reference to current CodeGenFunction.
/// \param Loc Clang source location.
/// \param Flags Flags for OpenMP location.
///
llvm::Value *
@ -187,8 +168,6 @@ private:
llvm::Constant *CreateRuntimeFunction(OpenMPRTLFunction Function);
/// \brief Gets thread id value for the current thread.
/// \param CGF Reference to current CodeGenFunction.
/// \param Loc Clang source location.
///
llvm::Value *GetOpenMPThreadID(CodeGenFunction &CGF, SourceLocation Loc);
@ -196,16 +175,25 @@ public:
explicit CGOpenMPRuntime(CodeGenModule &CGM);
virtual ~CGOpenMPRuntime() {}
/// \brief Emits outlined function for the specified OpenMP directive \a D
/// (required for parallel and task directives). This outlined function has
/// type void(*)(kmp_int32 /*ThreadID*/, kmp_int32 /*BoundID*/, struct
/// context_vars*).
/// \param D OpenMP directive.
/// \param ThreadIDVar Variable for thread id in the current OpenMP region.
///
virtual llvm::Value *
EmitOpenMPOutlinedFunction(const OMPExecutableDirective &D,
const VarDecl *ThreadIDVar);
/// \brief Cleans up references to the objects in finished function.
/// \param CGF Reference to finished CodeGenFunction.
///
void FunctionFinished(CodeGenFunction &CGF);
/// \brief Emits code for parallel call of the \a OutlinedFn with variables
/// captured in a record which address is stored in \a CapturedStruct.
/// \param CGF Reference to current CodeGenFunction.
/// \param Loc Clang source location.
/// \param OutlinedFn Outlined function to be run in parallel threads.
/// \param OutlinedFn Outlined function to be run in parallel threads. Type of
/// this function is void(*)(kmp_int32, kmp_int32, struct context_vars*).
/// \param CapturedStruct A pointer to the record with the references to
/// variables used in \a OutlinedFn function.
///
@ -216,14 +204,14 @@ public:
/// \brief Returns corresponding lock object for the specified critical region
/// name. If the lock object does not exist it is created, otherwise the
/// reference to the existing copy is returned.
/// \param CriticalName Name of the critical region.
///
llvm::Value *GetCriticalRegionLock(StringRef CriticalName);
/// \brief Emits start of the critical region by calling void
/// __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name
/// * \a RegionLock)
/// \param CGF Reference to current CodeGenFunction.
/// \param RegionLock The lock object for critical region.
/// \param Loc Location of the construct.
virtual void EmitOMPCriticalRegionStart(CodeGenFunction &CGF,
llvm::Value *RegionLock,
SourceLocation Loc);
@ -231,16 +219,12 @@ public:
/// \brief Emits end of the critical region by calling void
/// __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name
/// * \a RegionLock)
/// \param CGF Reference to current CodeGenFunction.
/// \param RegionLock The lock object for critical region.
/// \param Loc Location of the construct.
virtual void EmitOMPCriticalRegionEnd(CodeGenFunction &CGF,
llvm::Value *RegionLock,
SourceLocation Loc);
/// \brief Emits a barrier for OpenMP threads.
/// \param CGF Reference to current CodeGenFunction.
/// \param Loc Clang source location.
/// \param Flags Flags for the barrier.
///
virtual void EmitOMPBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,

View File

@ -138,17 +138,10 @@ void CodeGenFunction::EmitOMPFirstprivateClause(
}
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
const CapturedStmt *CS = cast<CapturedStmt>(S.getAssociatedStmt());
llvm::Value *CapturedStruct = GenerateCapturedStmtArgument(*CS);
llvm::Value *OutlinedFn;
{
CodeGenFunction CGF(CGM, true);
CGOpenMPRegionInfo CGInfo(S, *CS, *CS->getCapturedDecl()->param_begin());
CGF.CapturedStmtInfo = &CGInfo;
OutlinedFn = CGF.GenerateCapturedStmtFunction(*CS);
}
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
auto OutlinedFn = CGM.getOpenMPRuntime().EmitOpenMPOutlinedFunction(
S, *CS->getCapturedDecl()->param_begin());
CGM.getOpenMPRuntime().EmitOMPParallelCall(*this, S.getLocStart(), OutlinedFn,
CapturedStruct);
}

View File

@ -215,6 +215,10 @@ public:
bool isCXXThisExprCaptured() const { return CXXThisFieldDecl != nullptr; }
FieldDecl *getThisFieldDecl() const { return CXXThisFieldDecl; }
static bool classof(const CGCapturedStmtInfo *) {
return true;
}
/// \brief Emit the captured statement body.
virtual void EmitBody(CodeGenFunction &CGF, Stmt *S) {
RegionCounter Cnt = CGF.getPGORegionCounter(S);

View File

@ -74,7 +74,7 @@ int main() {
// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}* [[GTID_ADDR]]
// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
// CHECK: [[T_VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}** [[T_VAR_PTR_REF]],
// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}* [[T_VAR_REF]],
@ -110,6 +110,8 @@ int main() {
// CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]])
// CHECK: call {{.*}} [[S_FLOAT_TY_COPY_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]])
// CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}* [[GTID_REF]]
// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
// CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
// CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
@ -127,7 +129,7 @@ int main() {
// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}* [[GTID_ADDR]]
// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
// CHECK: [[T_VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}** [[T_VAR_PTR_REF]],
// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}* [[T_VAR_REF]],
@ -163,6 +165,8 @@ int main() {
// CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]])
// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]])
// CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}* [[GTID_REF]]
// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
// CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]])
// CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]*