2014-05-06 18:08:46 +08:00
|
|
|
//===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This contains code to emit OpenMP nodes as LLVM code.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "CGOpenMPRuntime.h"
|
|
|
|
#include "CodeGenFunction.h"
|
|
|
|
#include "CodeGenModule.h"
|
2015-01-14 19:29:14 +08:00
|
|
|
#include "TargetInfo.h"
|
2014-05-06 18:08:46 +08:00
|
|
|
#include "clang/AST/Stmt.h"
|
|
|
|
#include "clang/AST/StmtOpenMP.h"
|
|
|
|
using namespace clang;
|
|
|
|
using namespace CodeGen;
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// OpenMP Directive Emission
|
|
|
|
//===----------------------------------------------------------------------===//
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
/// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
|
|
|
|
/// function. Here is the logic:
|
|
|
|
/// if (Cond) {
|
|
|
|
/// CodeGen(true);
|
|
|
|
/// } else {
|
|
|
|
/// CodeGen(false);
|
|
|
|
/// }
|
|
|
|
static void EmitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
|
|
|
|
const std::function<void(bool)> &CodeGen) {
|
|
|
|
CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
|
|
|
|
|
|
|
|
// If the condition constant folds and can be elided, try to avoid emitting
|
|
|
|
// the condition and the dead arm of the if/else.
|
|
|
|
bool CondConstant;
|
|
|
|
if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
|
|
|
|
CodeGen(CondConstant);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Otherwise, the condition did not fold, or we couldn't elide it. Just
|
|
|
|
// emit the conditional branch.
|
|
|
|
auto ThenBlock = CGF.createBasicBlock(/*name*/ "omp_if.then");
|
|
|
|
auto ElseBlock = CGF.createBasicBlock(/*name*/ "omp_if.else");
|
|
|
|
auto ContBlock = CGF.createBasicBlock(/*name*/ "omp_if.end");
|
|
|
|
CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount*/ 0);
|
|
|
|
|
|
|
|
// Emit the 'then' code.
|
|
|
|
CGF.EmitBlock(ThenBlock);
|
|
|
|
CodeGen(/*ThenBlock*/ true);
|
|
|
|
CGF.EmitBranch(ContBlock);
|
|
|
|
// Emit the 'else' code if present.
|
|
|
|
{
|
|
|
|
// There is no need to emit line number for unconditional branch.
|
2015-02-04 04:00:54 +08:00
|
|
|
auto NL = ApplyDebugLocation::CreateEmpty(CGF);
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
CGF.EmitBlock(ElseBlock);
|
|
|
|
}
|
|
|
|
CodeGen(/*ThenBlock*/ false);
|
|
|
|
{
|
|
|
|
// There is no need to emit line number for unconditional branch.
|
2015-02-04 04:00:54 +08:00
|
|
|
auto NL = ApplyDebugLocation::CreateEmpty(CGF);
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
CGF.EmitBranch(ContBlock);
|
|
|
|
}
|
|
|
|
// Emit the continuation block for code after the if.
|
|
|
|
CGF.EmitBlock(ContBlock, /*IsFinished*/ true);
|
|
|
|
}
|
|
|
|
|
2014-10-08 22:01:46 +08:00
|
|
|
void CodeGenFunction::EmitOMPAggregateAssign(LValue OriginalAddr,
|
|
|
|
llvm::Value *PrivateAddr,
|
|
|
|
const Expr *AssignExpr,
|
|
|
|
QualType OriginalType,
|
|
|
|
const VarDecl *VDInit) {
|
|
|
|
EmitBlock(createBasicBlock(".omp.assign.begin."));
|
|
|
|
if (!isa<CXXConstructExpr>(AssignExpr) || isTrivialInitializer(AssignExpr)) {
|
|
|
|
// Perform simple memcpy.
|
|
|
|
EmitAggregateAssign(PrivateAddr, OriginalAddr.getAddress(),
|
|
|
|
AssignExpr->getType());
|
|
|
|
} else {
|
|
|
|
// Perform element-by-element initialization.
|
|
|
|
QualType ElementTy;
|
|
|
|
auto SrcBegin = OriginalAddr.getAddress();
|
|
|
|
auto DestBegin = PrivateAddr;
|
|
|
|
auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
|
|
|
|
auto SrcNumElements = emitArrayLength(ArrayTy, ElementTy, SrcBegin);
|
|
|
|
auto DestNumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin);
|
|
|
|
auto SrcEnd = Builder.CreateGEP(SrcBegin, SrcNumElements);
|
|
|
|
auto DestEnd = Builder.CreateGEP(DestBegin, DestNumElements);
|
|
|
|
// The basic structure here is a do-while loop, because we don't
|
|
|
|
// need to check for the zero-element case.
|
|
|
|
auto BodyBB = createBasicBlock("omp.arraycpy.body");
|
|
|
|
auto DoneBB = createBasicBlock("omp.arraycpy.done");
|
|
|
|
auto IsEmpty =
|
|
|
|
Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
|
|
|
|
Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
|
|
|
|
|
|
|
|
// Enter the loop body, making that address the current address.
|
|
|
|
auto EntryBB = Builder.GetInsertBlock();
|
|
|
|
EmitBlock(BodyBB);
|
|
|
|
auto SrcElementPast = Builder.CreatePHI(SrcBegin->getType(), 2,
|
|
|
|
"omp.arraycpy.srcElementPast");
|
|
|
|
SrcElementPast->addIncoming(SrcEnd, EntryBB);
|
|
|
|
auto DestElementPast = Builder.CreatePHI(DestBegin->getType(), 2,
|
|
|
|
"omp.arraycpy.destElementPast");
|
|
|
|
DestElementPast->addIncoming(DestEnd, EntryBB);
|
|
|
|
|
|
|
|
// Shift the address back by one element.
|
|
|
|
auto NegativeOne = llvm::ConstantInt::get(SizeTy, -1, true);
|
|
|
|
auto DestElement = Builder.CreateGEP(DestElementPast, NegativeOne,
|
|
|
|
"omp.arraycpy.dest.element");
|
|
|
|
auto SrcElement = Builder.CreateGEP(SrcElementPast, NegativeOne,
|
|
|
|
"omp.arraycpy.src.element");
|
|
|
|
{
|
|
|
|
// Create RunCleanScope to cleanup possible temps.
|
|
|
|
CodeGenFunction::RunCleanupsScope Init(*this);
|
|
|
|
// Emit initialization for single element.
|
|
|
|
LocalDeclMap[VDInit] = SrcElement;
|
|
|
|
EmitAnyExprToMem(AssignExpr, DestElement,
|
|
|
|
AssignExpr->getType().getQualifiers(),
|
|
|
|
/*IsInitializer*/ false);
|
|
|
|
LocalDeclMap.erase(VDInit);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check whether we've reached the end.
|
|
|
|
auto Done =
|
|
|
|
Builder.CreateICmpEQ(DestElement, DestBegin, "omp.arraycpy.done");
|
|
|
|
Builder.CreateCondBr(Done, DoneBB, BodyBB);
|
|
|
|
DestElementPast->addIncoming(DestElement, Builder.GetInsertBlock());
|
|
|
|
SrcElementPast->addIncoming(SrcElement, Builder.GetInsertBlock());
|
|
|
|
|
|
|
|
// Done.
|
|
|
|
EmitBlock(DoneBB, true);
|
|
|
|
}
|
|
|
|
EmitBlock(createBasicBlock(".omp.assign.end."));
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPFirstprivateClause(
|
|
|
|
const OMPExecutableDirective &D,
|
2014-10-10 17:48:26 +08:00
|
|
|
CodeGenFunction::OMPPrivateScope &PrivateScope) {
|
2014-10-08 22:01:46 +08:00
|
|
|
auto PrivateFilter = [](const OMPClause *C) -> bool {
|
|
|
|
return C->getClauseKind() == OMPC_firstprivate;
|
|
|
|
};
|
|
|
|
for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)>
|
|
|
|
I(D.clauses(), PrivateFilter); I; ++I) {
|
|
|
|
auto *C = cast<OMPFirstprivateClause>(*I);
|
|
|
|
auto IRef = C->varlist_begin();
|
|
|
|
auto InitsRef = C->inits().begin();
|
|
|
|
for (auto IInit : C->private_copies()) {
|
2014-10-10 17:48:26 +08:00
|
|
|
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
|
|
|
|
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
|
|
|
|
bool IsRegistered;
|
2014-10-08 22:01:46 +08:00
|
|
|
if (*InitsRef != nullptr) {
|
|
|
|
// Emit VarDecl with copy init for arrays.
|
2014-10-10 17:48:26 +08:00
|
|
|
auto *FD = CapturedStmtInfo->lookup(OrigVD);
|
2014-10-08 22:01:46 +08:00
|
|
|
LValue Base = MakeNaturalAlignAddrLValue(
|
|
|
|
CapturedStmtInfo->getContextValue(),
|
|
|
|
getContext().getTagDeclType(FD->getParent()));
|
|
|
|
auto OriginalAddr = EmitLValueForField(Base, FD);
|
|
|
|
auto VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
|
2014-10-10 17:48:26 +08:00
|
|
|
IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
|
|
|
|
auto Emission = EmitAutoVarAlloca(*VD);
|
|
|
|
// Emit initialization of aggregate firstprivate vars.
|
|
|
|
EmitOMPAggregateAssign(OriginalAddr, Emission.getAllocatedAddress(),
|
|
|
|
VD->getInit(), (*IRef)->getType(), VDInit);
|
|
|
|
EmitAutoVarCleanups(Emission);
|
|
|
|
return Emission.getAllocatedAddress();
|
|
|
|
});
|
2014-10-08 22:01:46 +08:00
|
|
|
} else
|
2014-10-10 17:48:26 +08:00
|
|
|
IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
|
|
|
|
// Emit private VarDecl with copy init.
|
|
|
|
EmitDecl(*VD);
|
|
|
|
return GetAddrOfLocalVar(VD);
|
|
|
|
});
|
2015-03-16 15:14:41 +08:00
|
|
|
assert(IsRegistered && "firstprivate var already registered as private");
|
2014-10-10 17:48:26 +08:00
|
|
|
// Silence the warning about unused variable.
|
|
|
|
(void)IsRegistered;
|
2014-10-08 22:01:46 +08:00
|
|
|
++IRef, ++InitsRef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-10-21 11:16:40 +08:00
|
|
|
void CodeGenFunction::EmitOMPPrivateClause(
|
|
|
|
const OMPExecutableDirective &D,
|
|
|
|
CodeGenFunction::OMPPrivateScope &PrivateScope) {
|
|
|
|
auto PrivateFilter = [](const OMPClause *C) -> bool {
|
|
|
|
return C->getClauseKind() == OMPC_private;
|
|
|
|
};
|
|
|
|
for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)>
|
|
|
|
I(D.clauses(), PrivateFilter); I; ++I) {
|
|
|
|
auto *C = cast<OMPPrivateClause>(*I);
|
|
|
|
auto IRef = C->varlist_begin();
|
|
|
|
for (auto IInit : C->private_copies()) {
|
|
|
|
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
|
|
|
|
auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
|
|
|
|
bool IsRegistered =
|
|
|
|
PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
|
|
|
|
// Emit private VarDecl with copy init.
|
|
|
|
EmitDecl(*VD);
|
|
|
|
return GetAddrOfLocalVar(VD);
|
|
|
|
});
|
2015-03-16 15:14:41 +08:00
|
|
|
assert(IsRegistered && "private var already registered as private");
|
2014-10-21 11:16:40 +08:00
|
|
|
// Silence the warning about unused variable.
|
|
|
|
(void)IsRegistered;
|
|
|
|
++IRef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
void CodeGenFunction::EmitOMPReductionClauseInit(
|
|
|
|
const OMPExecutableDirective &D,
|
|
|
|
CodeGenFunction::OMPPrivateScope &PrivateScope) {
|
|
|
|
auto ReductionFilter = [](const OMPClause *C) -> bool {
|
|
|
|
return C->getClauseKind() == OMPC_reduction;
|
|
|
|
};
|
|
|
|
for (OMPExecutableDirective::filtered_clause_iterator<decltype(
|
|
|
|
ReductionFilter)> I(D.clauses(), ReductionFilter);
|
|
|
|
I; ++I) {
|
|
|
|
auto *C = cast<OMPReductionClause>(*I);
|
|
|
|
auto ILHS = C->lhs_exprs().begin();
|
|
|
|
auto IRHS = C->rhs_exprs().begin();
|
|
|
|
for (auto IRef : C->varlists()) {
|
|
|
|
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
|
|
|
|
auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
|
|
|
|
auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
|
|
|
|
// Store the address of the original variable associated with the LHS
|
|
|
|
// implicit variable.
|
|
|
|
PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> llvm::Value *{
|
|
|
|
DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
|
|
|
|
CapturedStmtInfo->lookup(OrigVD) != nullptr,
|
|
|
|
IRef->getType(), VK_LValue, IRef->getExprLoc());
|
|
|
|
return EmitLValue(&DRE).getAddress();
|
|
|
|
});
|
|
|
|
// Emit reduction copy.
|
|
|
|
bool IsRegistered =
|
|
|
|
PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> llvm::Value *{
|
|
|
|
// Emit private VarDecl with reduction init.
|
|
|
|
EmitDecl(*PrivateVD);
|
|
|
|
return GetAddrOfLocalVar(PrivateVD);
|
|
|
|
});
|
|
|
|
assert(IsRegistered && "private var already registered as private");
|
|
|
|
// Silence the warning about unused variable.
|
|
|
|
(void)IsRegistered;
|
|
|
|
++ILHS, ++IRHS;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPReductionClauseFinal(
|
|
|
|
const OMPExecutableDirective &D) {
|
|
|
|
llvm::SmallVector<const Expr *, 8> LHSExprs;
|
|
|
|
llvm::SmallVector<const Expr *, 8> RHSExprs;
|
|
|
|
llvm::SmallVector<const Expr *, 8> ReductionOps;
|
|
|
|
auto ReductionFilter = [](const OMPClause *C) -> bool {
|
|
|
|
return C->getClauseKind() == OMPC_reduction;
|
|
|
|
};
|
|
|
|
bool HasAtLeastOneReduction = false;
|
|
|
|
for (OMPExecutableDirective::filtered_clause_iterator<decltype(
|
|
|
|
ReductionFilter)> I(D.clauses(), ReductionFilter);
|
|
|
|
I; ++I) {
|
|
|
|
HasAtLeastOneReduction = true;
|
|
|
|
auto *C = cast<OMPReductionClause>(*I);
|
|
|
|
LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
|
|
|
|
RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
|
|
|
|
ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
|
|
|
|
}
|
|
|
|
if (HasAtLeastOneReduction) {
|
|
|
|
// Emit nowait reduction if nowait clause is present or directive is a
|
|
|
|
// parallel directive (it always has implicit barrier).
|
|
|
|
CGM.getOpenMPRuntime().emitReduction(
|
|
|
|
*this, D.getLocEnd(), LHSExprs, RHSExprs, ReductionOps,
|
|
|
|
D.getSingleClause(OMPC_nowait) ||
|
|
|
|
isOpenMPParallelDirective(D.getDirectiveKind()));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-10-13 16:23:51 +08:00
|
|
|
/// \brief Emits code for OpenMP parallel directive in the parallel region.
|
2015-04-10 12:50:10 +08:00
|
|
|
static void emitOMPParallelCall(CodeGenFunction &CGF,
|
|
|
|
const OMPExecutableDirective &S,
|
2014-10-13 16:23:51 +08:00
|
|
|
llvm::Value *OutlinedFn,
|
|
|
|
llvm::Value *CapturedStruct) {
|
|
|
|
if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) {
|
|
|
|
CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
|
|
|
|
auto NumThreadsClause = cast<OMPNumThreadsClause>(C);
|
|
|
|
auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
|
|
|
|
/*IgnoreResultAssign*/ true);
|
2015-02-25 16:32:46 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
|
2014-10-13 16:23:51 +08:00
|
|
|
CGF, NumThreads, NumThreadsClause->getLocStart());
|
|
|
|
}
|
2015-02-25 16:32:46 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
|
|
|
|
CapturedStruct);
|
2014-10-13 16:23:51 +08:00
|
|
|
}
|
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
|
|
|
|
const OMPExecutableDirective &S,
|
|
|
|
const RegionCodeGenTy &CodeGen) {
|
2014-10-10 20:19:54 +08:00
|
|
|
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
|
2015-04-10 12:50:10 +08:00
|
|
|
auto CapturedStruct = CGF.GenerateCapturedStmtArgument(*CS);
|
|
|
|
auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
|
|
|
|
S, *CS->getCapturedDecl()->param_begin(), CodeGen);
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
if (auto C = S.getSingleClause(/*K*/ OMPC_if)) {
|
|
|
|
auto Cond = cast<OMPIfClause>(C)->getCondition();
|
2015-04-10 12:50:10 +08:00
|
|
|
EmitOMPIfClause(CGF, Cond, [&](bool ThenBlock) {
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
if (ThenBlock)
|
2015-04-10 12:50:10 +08:00
|
|
|
emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct);
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
else
|
2015-04-10 12:50:10 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitSerialCall(CGF, S.getLocStart(),
|
|
|
|
OutlinedFn, CapturedStruct);
|
[OPENMP] Codegen for 'if' clause in 'parallel' directive.
Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive.
If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
global_thread_id.addr = alloca i32
store i32 global_thread_id, global_thread_id.addr
zero.addr = alloca i32
store i32 0, zero.addr
kmpc_serialized_parallel(loc, global_thread_id);
microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/);
kmpc_end_serialized_parallel(loc, global_thread_id);
Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call.
Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D4716
llvm-svn: 219597
2014-10-13 14:02:40 +08:00
|
|
|
});
|
2014-10-13 16:23:51 +08:00
|
|
|
} else
|
2015-04-10 12:50:10 +08:00
|
|
|
emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct);
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
|
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
|
|
|
// Emit parallel region as a standalone region.
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
|
|
|
OMPPrivateScope PrivateScope(CGF);
|
|
|
|
CGF.EmitOMPPrivateClause(S, PrivateScope);
|
|
|
|
CGF.EmitOMPFirstprivateClause(S, PrivateScope);
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
|
2015-04-10 12:50:10 +08:00
|
|
|
if (PrivateScope.Privatize())
|
|
|
|
// Emit implicit barrier to synchronize threads and avoid data races.
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
|
|
|
|
OMPD_unknown);
|
|
|
|
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
CGF.EmitOMPReductionClauseFinal(S);
|
2015-04-10 12:50:10 +08:00
|
|
|
// Emit implicit barrier at the end of the 'parallel' directive.
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
|
|
|
|
OMPD_unknown);
|
|
|
|
};
|
|
|
|
emitCommonOMPParallelDirective(*this, S, CodeGen);
|
2014-05-06 18:08:46 +08:00
|
|
|
}
|
2014-05-22 16:54:05 +08:00
|
|
|
|
2014-10-07 16:57:09 +08:00
|
|
|
void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S,
|
2014-10-01 14:03:56 +08:00
|
|
|
bool SeparateIter) {
|
|
|
|
RunCleanupsScope BodyScope(*this);
|
|
|
|
// Update counters values on current iteration.
|
|
|
|
for (auto I : S.updates()) {
|
|
|
|
EmitIgnoredExpr(I);
|
|
|
|
}
|
2015-03-21 18:12:56 +08:00
|
|
|
// Update the linear variables.
|
|
|
|
for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
|
|
|
|
for (auto U : C->updates()) {
|
|
|
|
EmitIgnoredExpr(U);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-10-01 14:03:56 +08:00
|
|
|
// On a continue in the body, jump to the end.
|
2014-10-07 16:57:09 +08:00
|
|
|
auto Continue = getJumpDestInCurrentScope("omp.body.continue");
|
2014-10-01 14:03:56 +08:00
|
|
|
BreakContinueStack.push_back(BreakContinue(JumpDest(), Continue));
|
|
|
|
// Emit loop body.
|
|
|
|
EmitStmt(S.getBody());
|
|
|
|
// The end (updates/cleanups).
|
|
|
|
EmitBlock(Continue.getBlock());
|
|
|
|
BreakContinueStack.pop_back();
|
|
|
|
if (SeparateIter) {
|
|
|
|
// TODO: Update lastprivates if the SeparateIter flag is true.
|
|
|
|
// This will be implemented in a follow-up OMPLastprivateClause patch, but
|
|
|
|
// result should be still correct without it, as we do not make these
|
|
|
|
// variables private yet.
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
void CodeGenFunction::EmitOMPInnerLoop(
|
|
|
|
const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
|
|
|
|
const Expr *IncExpr,
|
|
|
|
const llvm::function_ref<void(CodeGenFunction &)> &BodyGen) {
|
2014-10-07 16:57:09 +08:00
|
|
|
auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
|
2014-10-01 14:03:56 +08:00
|
|
|
auto Cnt = getPGORegionCounter(&S);
|
|
|
|
|
|
|
|
// Start the loop with a block that tests the condition.
|
2014-10-07 16:57:09 +08:00
|
|
|
auto CondBlock = createBasicBlock("omp.inner.for.cond");
|
2014-10-01 14:03:56 +08:00
|
|
|
EmitBlock(CondBlock);
|
|
|
|
LoopStack.push(CondBlock);
|
|
|
|
|
|
|
|
// If there are any cleanups between here and the loop-exit scope,
|
|
|
|
// create a block to stage a loop exit along.
|
|
|
|
auto ExitBlock = LoopExit.getBlock();
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
if (RequiresCleanup)
|
2014-10-07 16:57:09 +08:00
|
|
|
ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
|
2014-10-01 14:03:56 +08:00
|
|
|
|
2014-10-07 16:57:09 +08:00
|
|
|
auto LoopBody = createBasicBlock("omp.inner.for.body");
|
2014-10-01 14:03:56 +08:00
|
|
|
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
// Emit condition.
|
|
|
|
EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, Cnt.getCount());
|
2014-10-01 14:03:56 +08:00
|
|
|
if (ExitBlock != LoopExit.getBlock()) {
|
|
|
|
EmitBlock(ExitBlock);
|
|
|
|
EmitBranchThroughCleanup(LoopExit);
|
|
|
|
}
|
|
|
|
|
|
|
|
EmitBlock(LoopBody);
|
|
|
|
Cnt.beginRegion(Builder);
|
|
|
|
|
|
|
|
// Create a block for the increment.
|
2014-10-07 16:57:09 +08:00
|
|
|
auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
|
2014-10-01 14:03:56 +08:00
|
|
|
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
|
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
BodyGen(*this);
|
2014-10-01 14:03:56 +08:00
|
|
|
|
|
|
|
// Emit "IV = IV + 1" and a back-edge to the condition block.
|
|
|
|
EmitBlock(Continue.getBlock());
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
EmitIgnoredExpr(IncExpr);
|
2014-10-01 14:03:56 +08:00
|
|
|
BreakContinueStack.pop_back();
|
|
|
|
EmitBranch(CondBlock);
|
|
|
|
LoopStack.pop();
|
|
|
|
// Emit the fall-through block.
|
|
|
|
EmitBlock(LoopExit.getBlock());
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &S) {
|
|
|
|
auto IC = S.counters().begin();
|
|
|
|
for (auto F : S.finals()) {
|
|
|
|
if (LocalDeclMap.lookup(cast<DeclRefExpr>((*IC))->getDecl())) {
|
|
|
|
EmitIgnoredExpr(F);
|
|
|
|
}
|
|
|
|
++IC;
|
|
|
|
}
|
2015-03-21 18:12:56 +08:00
|
|
|
// Emit the final values of the linear variables.
|
|
|
|
for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
|
|
|
|
for (auto F : C->finals()) {
|
|
|
|
EmitIgnoredExpr(F);
|
|
|
|
}
|
|
|
|
}
|
2014-10-01 14:03:56 +08:00
|
|
|
}
|
|
|
|
|
2014-09-30 13:29:28 +08:00
|
|
|
static void EmitOMPAlignedClause(CodeGenFunction &CGF, CodeGenModule &CGM,
|
|
|
|
const OMPAlignedClause &Clause) {
|
|
|
|
unsigned ClauseAlignment = 0;
|
|
|
|
if (auto AlignmentExpr = Clause.getAlignment()) {
|
|
|
|
auto AlignmentCI =
|
|
|
|
cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
|
|
|
|
ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
|
|
|
|
}
|
|
|
|
for (auto E : Clause.varlists()) {
|
|
|
|
unsigned Alignment = ClauseAlignment;
|
|
|
|
if (Alignment == 0) {
|
|
|
|
// OpenMP [2.8.1, Description]
|
2014-10-10 17:48:26 +08:00
|
|
|
// If no optional parameter is specified, implementation-defined default
|
2014-09-30 13:29:28 +08:00
|
|
|
// alignments for SIMD instructions on the target platforms are assumed.
|
|
|
|
Alignment = CGM.getTargetCodeGenInfo().getOpenMPSimdDefaultAlignment(
|
|
|
|
E->getType());
|
|
|
|
}
|
|
|
|
assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
|
|
|
|
"alignment is not power of 2");
|
|
|
|
if (Alignment != 0) {
|
|
|
|
llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
|
|
|
|
CGF.EmitAlignmentAssumption(PtrValue, Alignment);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-10-10 17:48:26 +08:00
|
|
|
static void EmitPrivateLoopCounters(CodeGenFunction &CGF,
|
|
|
|
CodeGenFunction::OMPPrivateScope &LoopScope,
|
|
|
|
ArrayRef<Expr *> Counters) {
|
|
|
|
for (auto *E : Counters) {
|
|
|
|
auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
|
|
|
bool IsRegistered = LoopScope.addPrivate(VD, [&]() -> llvm::Value * {
|
|
|
|
// Emit var without initialization.
|
|
|
|
auto VarEmission = CGF.EmitAutoVarAlloca(*VD);
|
|
|
|
CGF.EmitAutoVarCleanups(VarEmission);
|
|
|
|
return VarEmission.getAllocatedAddress();
|
|
|
|
});
|
|
|
|
assert(IsRegistered && "counter already registered as private");
|
|
|
|
// Silence the warning about unused variable.
|
|
|
|
(void)IsRegistered;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-21 18:12:56 +08:00
|
|
|
static void
|
|
|
|
EmitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,
|
|
|
|
CodeGenFunction::OMPPrivateScope &PrivateScope) {
|
|
|
|
for (auto Clause : OMPExecutableDirective::linear_filter(D.clauses())) {
|
|
|
|
for (auto *E : Clause->varlists()) {
|
|
|
|
auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
|
|
|
bool IsRegistered = PrivateScope.addPrivate(VD, [&]()->llvm::Value * {
|
|
|
|
// Emit var without initialization.
|
|
|
|
auto VarEmission = CGF.EmitAutoVarAlloca(*VD);
|
|
|
|
CGF.EmitAutoVarCleanups(VarEmission);
|
|
|
|
return VarEmission.getAllocatedAddress();
|
|
|
|
});
|
|
|
|
assert(IsRegistered && "linear var already registered as private");
|
|
|
|
// Silence the warning about unused variable.
|
|
|
|
(void)IsRegistered;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-05-22 16:54:05 +08:00
|
|
|
void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
|
2015-04-10 12:50:10 +08:00
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
|
|
|
// Pragma 'simd' code depends on presence of 'lastprivate'.
|
|
|
|
// If present, we have to separate last iteration of the loop:
|
|
|
|
//
|
|
|
|
// if (LastIteration != 0) {
|
|
|
|
// for (IV in 0..LastIteration-1) BODY;
|
|
|
|
// BODY with updates of lastprivate vars;
|
|
|
|
// <Final counter/linear vars updates>;
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// otherwise (when there's no lastprivate):
|
|
|
|
//
|
|
|
|
// for (IV in 0..LastIteration) BODY;
|
|
|
|
// <Final counter/linear vars updates>;
|
|
|
|
//
|
|
|
|
|
|
|
|
// Walk clauses and process safelen/lastprivate.
|
|
|
|
bool SeparateIter = false;
|
|
|
|
CGF.LoopStack.setParallel();
|
|
|
|
CGF.LoopStack.setVectorizerEnable(true);
|
|
|
|
for (auto C : S.clauses()) {
|
|
|
|
switch (C->getClauseKind()) {
|
|
|
|
case OMPC_safelen: {
|
|
|
|
RValue Len = CGF.EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(),
|
|
|
|
AggValueSlot::ignored(), true);
|
|
|
|
llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
|
|
|
|
CGF.LoopStack.setVectorizerWidth(Val->getZExtValue());
|
|
|
|
// In presence of finite 'safelen', it may be unsafe to mark all
|
|
|
|
// the memory instructions parallel, because loop-carried
|
|
|
|
// dependences of 'safelen' iterations are possible.
|
|
|
|
CGF.LoopStack.setParallel(false);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OMPC_aligned:
|
|
|
|
EmitOMPAlignedClause(CGF, CGF.CGM, cast<OMPAlignedClause>(*C));
|
|
|
|
break;
|
|
|
|
case OMPC_lastprivate:
|
|
|
|
SeparateIter = true;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
// Not handled yet
|
|
|
|
;
|
|
|
|
}
|
2014-05-22 16:54:05 +08:00
|
|
|
}
|
2014-10-01 14:03:56 +08:00
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
// Emit inits for the linear variables.
|
|
|
|
for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
|
|
|
|
for (auto Init : C->inits()) {
|
|
|
|
auto *D = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
|
|
|
|
CGF.EmitVarDecl(*D);
|
|
|
|
}
|
2015-03-21 18:12:56 +08:00
|
|
|
}
|
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
// Emit the loop iteration variable.
|
|
|
|
const Expr *IVExpr = S.getIterationVariable();
|
|
|
|
const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
|
|
|
|
CGF.EmitVarDecl(*IVDecl);
|
|
|
|
CGF.EmitIgnoredExpr(S.getInit());
|
|
|
|
|
|
|
|
// Emit the iterations count variable.
|
|
|
|
// If it is not a variable, Sema decided to calculate iterations count on
|
|
|
|
// each
|
|
|
|
// iteration (e.g., it is foldable into a constant).
|
|
|
|
if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
|
|
|
|
CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
|
|
|
|
// Emit calculation of the iterations count.
|
|
|
|
CGF.EmitIgnoredExpr(S.getCalcLastIteration());
|
|
|
|
}
|
2014-10-01 14:03:56 +08:00
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
// Emit the linear steps for the linear clauses.
|
|
|
|
// If a step is not constant, it is pre-calculated before the loop.
|
|
|
|
for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
|
|
|
|
if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
|
|
|
|
if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
|
|
|
|
CGF.EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
|
|
|
|
// Emit calculation of the linear step.
|
|
|
|
CGF.EmitIgnoredExpr(CS);
|
|
|
|
}
|
|
|
|
}
|
2014-10-01 14:03:56 +08:00
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
if (SeparateIter) {
|
|
|
|
// Emit: if (LastIteration > 0) - begin.
|
|
|
|
RegionCounter Cnt = CGF.getPGORegionCounter(&S);
|
|
|
|
auto ThenBlock = CGF.createBasicBlock("simd.if.then");
|
|
|
|
auto ContBlock = CGF.createBasicBlock("simd.if.end");
|
|
|
|
CGF.EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock,
|
|
|
|
Cnt.getCount());
|
|
|
|
CGF.EmitBlock(ThenBlock);
|
|
|
|
Cnt.beginRegion(CGF.Builder);
|
|
|
|
// Emit 'then' code.
|
|
|
|
{
|
|
|
|
OMPPrivateScope LoopScope(CGF);
|
|
|
|
EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
|
|
|
|
EmitPrivateLinearVars(CGF, S, LoopScope);
|
|
|
|
CGF.EmitOMPPrivateClause(S, LoopScope);
|
|
|
|
(void)LoopScope.Privatize();
|
|
|
|
CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
|
|
|
|
S.getCond(/*SeparateIter=*/true), S.getInc(),
|
|
|
|
[&S](CodeGenFunction &CGF) {
|
|
|
|
CGF.EmitOMPLoopBody(S);
|
|
|
|
CGF.EmitStopPoint(&S);
|
|
|
|
});
|
|
|
|
CGF.EmitOMPLoopBody(S, /* SeparateIter */ true);
|
2015-03-21 18:12:56 +08:00
|
|
|
}
|
2015-04-10 12:50:10 +08:00
|
|
|
CGF.EmitOMPSimdFinal(S);
|
|
|
|
// Emit: if (LastIteration != 0) - end.
|
|
|
|
CGF.EmitBranch(ContBlock);
|
|
|
|
CGF.EmitBlock(ContBlock, true);
|
|
|
|
} else {
|
|
|
|
{
|
|
|
|
OMPPrivateScope LoopScope(CGF);
|
|
|
|
EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
|
|
|
|
EmitPrivateLinearVars(CGF, S, LoopScope);
|
|
|
|
CGF.EmitOMPPrivateClause(S, LoopScope);
|
|
|
|
(void)LoopScope.Privatize();
|
|
|
|
CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
|
|
|
|
S.getCond(/*SeparateIter=*/false), S.getInc(),
|
|
|
|
[&S](CodeGenFunction &CGF) {
|
|
|
|
CGF.EmitOMPLoopBody(S);
|
|
|
|
CGF.EmitStopPoint(&S);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
CGF.EmitOMPSimdFinal(S);
|
2014-10-01 14:03:56 +08:00
|
|
|
}
|
2015-04-10 12:50:10 +08:00
|
|
|
};
|
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
|
2014-05-22 16:54:05 +08:00
|
|
|
}
|
|
|
|
|
2015-01-22 16:49:35 +08:00
|
|
|
void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
|
|
|
|
const OMPLoopDirective &S,
|
|
|
|
OMPPrivateScope &LoopScope,
|
|
|
|
llvm::Value *LB, llvm::Value *UB,
|
|
|
|
llvm::Value *ST, llvm::Value *IL,
|
|
|
|
llvm::Value *Chunk) {
|
|
|
|
auto &RT = CGM.getOpenMPRuntime();
|
2015-03-12 21:37:50 +08:00
|
|
|
|
|
|
|
// Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
|
|
|
|
const bool Dynamic = RT.isDynamic(ScheduleKind);
|
|
|
|
|
2015-01-22 16:49:35 +08:00
|
|
|
assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
|
|
|
|
"static non-chunked schedule does not need outer loop");
|
|
|
|
|
|
|
|
// Emit outer loop.
|
|
|
|
//
|
|
|
|
// OpenMP [2.7.1, Loop Construct, Description, table 2-1]
|
2015-03-12 21:37:50 +08:00
|
|
|
// When schedule(dynamic,chunk_size) is specified, the iterations are
|
|
|
|
// distributed to threads in the team in chunks as the threads request them.
|
|
|
|
// Each thread executes a chunk of iterations, then requests another chunk,
|
|
|
|
// until no chunks remain to be distributed. Each chunk contains chunk_size
|
|
|
|
// iterations, except for the last chunk to be distributed, which may have
|
|
|
|
// fewer iterations. When no chunk_size is specified, it defaults to 1.
|
|
|
|
//
|
|
|
|
// When schedule(guided,chunk_size) is specified, the iterations are assigned
|
|
|
|
// to threads in the team in chunks as the executing threads request them.
|
|
|
|
// Each thread executes a chunk of iterations, then requests another chunk,
|
|
|
|
// until no chunks remain to be assigned. For a chunk_size of 1, the size of
|
|
|
|
// each chunk is proportional to the number of unassigned iterations divided
|
|
|
|
// by the number of threads in the team, decreasing to 1. For a chunk_size
|
|
|
|
// with value k (greater than 1), the size of each chunk is determined in the
|
|
|
|
// same way, with the restriction that the chunks do not contain fewer than k
|
|
|
|
// iterations (except for the last chunk to be assigned, which may have fewer
|
|
|
|
// than k iterations).
|
|
|
|
//
|
|
|
|
// When schedule(auto) is specified, the decision regarding scheduling is
|
|
|
|
// delegated to the compiler and/or runtime system. The programmer gives the
|
|
|
|
// implementation the freedom to choose any possible mapping of iterations to
|
|
|
|
// threads in the team.
|
|
|
|
//
|
|
|
|
// When schedule(runtime) is specified, the decision regarding scheduling is
|
|
|
|
// deferred until run time, and the schedule and chunk size are taken from the
|
|
|
|
// run-sched-var ICV. If the ICV is set to auto, the schedule is
|
|
|
|
// implementation defined
|
|
|
|
//
|
|
|
|
// while(__kmpc_dispatch_next(&LB, &UB)) {
|
|
|
|
// idx = LB;
|
|
|
|
// while (idx <= UB) { BODY; ++idx; } // inner loop
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// OpenMP [2.7.1, Loop Construct, Description, table 2-1]
|
2015-01-22 16:49:35 +08:00
|
|
|
// When schedule(static, chunk_size) is specified, iterations are divided into
|
|
|
|
// chunks of size chunk_size, and the chunks are assigned to the threads in
|
|
|
|
// the team in a round-robin fashion in the order of the thread number.
|
|
|
|
//
|
|
|
|
// while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
|
|
|
|
// while (idx <= UB) { BODY; ++idx; } // inner loop
|
|
|
|
// LB = LB + ST;
|
|
|
|
// UB = UB + ST;
|
|
|
|
// }
|
|
|
|
//
|
2015-03-12 21:37:50 +08:00
|
|
|
|
2015-01-22 16:49:35 +08:00
|
|
|
const Expr *IVExpr = S.getIterationVariable();
|
|
|
|
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
|
|
|
|
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
|
|
|
|
|
2015-03-12 21:37:50 +08:00
|
|
|
RT.emitForInit(
|
|
|
|
*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB,
|
|
|
|
(Dynamic ? EmitAnyExpr(S.getLastIteration()).getScalarVal() : UB), ST,
|
|
|
|
Chunk);
|
|
|
|
|
2015-01-22 16:49:35 +08:00
|
|
|
auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
|
|
|
|
|
|
|
|
// Start the loop with a block that tests the condition.
|
|
|
|
auto CondBlock = createBasicBlock("omp.dispatch.cond");
|
|
|
|
EmitBlock(CondBlock);
|
|
|
|
LoopStack.push(CondBlock);
|
|
|
|
|
|
|
|
llvm::Value *BoolCondVal = nullptr;
|
2015-03-12 21:37:50 +08:00
|
|
|
if (!Dynamic) {
|
|
|
|
// UB = min(UB, GlobalUB)
|
|
|
|
EmitIgnoredExpr(S.getEnsureUpperBound());
|
|
|
|
// IV = LB
|
|
|
|
EmitIgnoredExpr(S.getInit());
|
|
|
|
// IV < UB
|
|
|
|
BoolCondVal = EvaluateExprAsBool(S.getCond(false));
|
|
|
|
} else {
|
|
|
|
BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned,
|
|
|
|
IL, LB, UB, ST);
|
|
|
|
}
|
2015-01-22 16:49:35 +08:00
|
|
|
|
|
|
|
// If there are any cleanups between here and the loop-exit scope,
|
|
|
|
// create a block to stage a loop exit along.
|
|
|
|
auto ExitBlock = LoopExit.getBlock();
|
|
|
|
if (LoopScope.requiresCleanups())
|
|
|
|
ExitBlock = createBasicBlock("omp.dispatch.cleanup");
|
|
|
|
|
|
|
|
auto LoopBody = createBasicBlock("omp.dispatch.body");
|
|
|
|
Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
|
|
|
|
if (ExitBlock != LoopExit.getBlock()) {
|
|
|
|
EmitBlock(ExitBlock);
|
|
|
|
EmitBranchThroughCleanup(LoopExit);
|
|
|
|
}
|
|
|
|
EmitBlock(LoopBody);
|
|
|
|
|
2015-03-12 21:37:50 +08:00
|
|
|
// Emit "IV = LB" (in case of static schedule, we have already calculated new
|
|
|
|
// LB for loop condition and emitted it above).
|
|
|
|
if (Dynamic)
|
|
|
|
EmitIgnoredExpr(S.getInit());
|
|
|
|
|
2015-01-22 16:49:35 +08:00
|
|
|
// Create a block for the increment.
|
|
|
|
auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
|
|
|
|
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
|
|
|
|
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
|
2015-04-10 12:50:10 +08:00
|
|
|
S.getCond(/*SeparateIter=*/false), S.getInc(),
|
|
|
|
[&S](CodeGenFunction &CGF) {
|
|
|
|
CGF.EmitOMPLoopBody(S);
|
|
|
|
CGF.EmitStopPoint(&S);
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
});
|
2015-01-22 16:49:35 +08:00
|
|
|
|
|
|
|
EmitBlock(Continue.getBlock());
|
|
|
|
BreakContinueStack.pop_back();
|
2015-03-12 21:37:50 +08:00
|
|
|
if (!Dynamic) {
|
|
|
|
// Emit "LB = LB + Stride", "UB = UB + Stride".
|
|
|
|
EmitIgnoredExpr(S.getNextLowerBound());
|
|
|
|
EmitIgnoredExpr(S.getNextUpperBound());
|
|
|
|
}
|
2015-01-22 16:49:35 +08:00
|
|
|
|
|
|
|
EmitBranch(CondBlock);
|
|
|
|
LoopStack.pop();
|
|
|
|
// Emit the fall-through block.
|
|
|
|
EmitBlock(LoopExit.getBlock());
|
|
|
|
|
|
|
|
// Tell the runtime we are done.
|
2015-03-12 21:37:50 +08:00
|
|
|
// FIXME: Also call fini for ordered loops with dynamic scheduling.
|
|
|
|
if (!Dynamic)
|
|
|
|
RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
|
2015-01-22 16:49:35 +08:00
|
|
|
}
|
|
|
|
|
2014-12-15 15:07:06 +08:00
|
|
|
/// \brief Emit a helper variable and return corresponding lvalue.
|
|
|
|
static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
|
|
|
|
const DeclRefExpr *Helper) {
|
|
|
|
auto VDecl = cast<VarDecl>(Helper->getDecl());
|
|
|
|
CGF.EmitVarDecl(*VDecl);
|
|
|
|
return CGF.EmitLValue(Helper);
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
|
|
|
|
// Emit the loop iteration variable.
|
|
|
|
auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
|
|
|
|
auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
|
|
|
|
EmitVarDecl(*IVDecl);
|
|
|
|
|
|
|
|
// Emit the iterations count variable.
|
|
|
|
// If it is not a variable, Sema decided to calculate iterations count on each
|
|
|
|
// iteration (e.g., it is foldable into a constant).
|
|
|
|
if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
|
|
|
|
EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
|
|
|
|
// Emit calculation of the iterations count.
|
|
|
|
EmitIgnoredExpr(S.getCalcLastIteration());
|
|
|
|
}
|
|
|
|
|
|
|
|
auto &RT = CGM.getOpenMPRuntime();
|
|
|
|
|
|
|
|
// Check pre-condition.
|
|
|
|
{
|
|
|
|
// Skip the entire loop if we don't meet the precondition.
|
|
|
|
RegionCounter Cnt = getPGORegionCounter(&S);
|
|
|
|
auto ThenBlock = createBasicBlock("omp.precond.then");
|
|
|
|
auto ContBlock = createBasicBlock("omp.precond.end");
|
|
|
|
EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
|
|
|
|
EmitBlock(ThenBlock);
|
|
|
|
Cnt.beginRegion(Builder);
|
|
|
|
// Emit 'then' code.
|
|
|
|
{
|
|
|
|
// Emit helper vars inits.
|
|
|
|
LValue LB =
|
|
|
|
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
|
|
|
|
LValue UB =
|
|
|
|
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
|
|
|
|
LValue ST =
|
|
|
|
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
|
|
|
|
LValue IL =
|
|
|
|
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
|
|
|
|
|
|
|
|
OMPPrivateScope LoopScope(*this);
|
|
|
|
EmitPrivateLoopCounters(*this, LoopScope, S.counters());
|
2015-03-16 15:14:41 +08:00
|
|
|
(void)LoopScope.Privatize();
|
2014-12-15 15:07:06 +08:00
|
|
|
|
|
|
|
// Detect the loop schedule kind and chunk.
|
|
|
|
auto ScheduleKind = OMPC_SCHEDULE_unknown;
|
|
|
|
llvm::Value *Chunk = nullptr;
|
|
|
|
if (auto C = cast_or_null<OMPScheduleClause>(
|
|
|
|
S.getSingleClause(OMPC_schedule))) {
|
|
|
|
ScheduleKind = C->getScheduleKind();
|
|
|
|
if (auto Ch = C->getChunkSize()) {
|
|
|
|
Chunk = EmitScalarExpr(Ch);
|
|
|
|
Chunk = EmitScalarConversion(Chunk, Ch->getType(),
|
|
|
|
S.getIterationVariable()->getType());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
|
|
|
|
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
|
|
|
|
if (RT.isStaticNonchunked(ScheduleKind,
|
|
|
|
/* Chunked */ Chunk != nullptr)) {
|
|
|
|
// OpenMP [2.7.1, Loop Construct, Description, table 2-1]
|
|
|
|
// When no chunk_size is specified, the iteration space is divided into
|
|
|
|
// chunks that are approximately equal in size, and at most one chunk is
|
|
|
|
// distributed to each thread. Note that the size of the chunks is
|
|
|
|
// unspecified in this case.
|
2015-02-25 16:32:46 +08:00
|
|
|
RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
|
|
|
|
IL.getAddress(), LB.getAddress(), UB.getAddress(),
|
|
|
|
ST.getAddress());
|
2014-12-15 15:07:06 +08:00
|
|
|
// UB = min(UB, GlobalUB);
|
|
|
|
EmitIgnoredExpr(S.getEnsureUpperBound());
|
|
|
|
// IV = LB;
|
|
|
|
EmitIgnoredExpr(S.getInit());
|
|
|
|
// while (idx <= UB) { BODY; ++idx; }
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
|
|
|
|
S.getCond(/*SeparateIter=*/false), S.getInc(),
|
2015-04-10 12:50:10 +08:00
|
|
|
[&S](CodeGenFunction &CGF) {
|
|
|
|
CGF.EmitOMPLoopBody(S);
|
|
|
|
CGF.EmitStopPoint(&S);
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
});
|
2014-12-15 15:07:06 +08:00
|
|
|
// Tell the runtime we are done.
|
2015-02-25 16:32:46 +08:00
|
|
|
RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
|
2015-01-22 16:49:35 +08:00
|
|
|
} else {
|
|
|
|
// Emit the outer loop, which requests its work chunk [LB..UB] from
|
|
|
|
// runtime and runs the inner loop to process it.
|
|
|
|
EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, LB.getAddress(),
|
|
|
|
UB.getAddress(), ST.getAddress(), IL.getAddress(),
|
|
|
|
Chunk);
|
|
|
|
}
|
2014-12-15 15:07:06 +08:00
|
|
|
}
|
|
|
|
// We're now done with the loop, so jump to the continuation block.
|
|
|
|
EmitBranch(ContBlock);
|
|
|
|
EmitBlock(ContBlock, true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
|
2015-04-10 12:50:10 +08:00
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
|
|
|
auto &&CodeGen =
|
|
|
|
[&S](CodeGenFunction &CGF) { CGF.EmitOMPWorksharingLoop(S); };
|
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
|
2014-12-15 15:07:06 +08:00
|
|
|
|
|
|
|
// Emit an implicit barrier at the end.
|
2015-03-30 12:30:22 +08:00
|
|
|
if (!S.getSingleClause(OMPC_nowait)) {
|
|
|
|
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
|
|
|
|
}
|
2014-06-18 12:14:57 +08:00
|
|
|
}
|
2014-06-25 19:44:49 +08:00
|
|
|
|
2014-09-18 13:12:34 +08:00
|
|
|
void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) {
|
|
|
|
llvm_unreachable("CodeGen for 'omp for simd' is not supported yet.");
|
|
|
|
}
|
|
|
|
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
|
|
|
|
const Twine &Name,
|
|
|
|
llvm::Value *Init = nullptr) {
|
|
|
|
auto LVal = CGF.MakeNaturalAlignAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
|
|
|
|
if (Init)
|
|
|
|
CGF.EmitScalarInit(Init, LVal);
|
|
|
|
return LVal;
|
|
|
|
}
|
|
|
|
|
2015-04-14 11:29:22 +08:00
|
|
|
static OpenMPDirectiveKind emitSections(CodeGenFunction &CGF,
|
|
|
|
const OMPExecutableDirective &S) {
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
|
|
|
|
auto *CS = dyn_cast<CompoundStmt>(Stmt);
|
|
|
|
if (CS && CS->size() > 1) {
|
2015-04-10 12:50:10 +08:00
|
|
|
auto &&CodeGen = [&S, CS](CodeGenFunction &CGF) {
|
|
|
|
auto &C = CGF.CGM.getContext();
|
|
|
|
auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
|
|
|
|
// Emit helper vars inits.
|
|
|
|
LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
|
|
|
|
CGF.Builder.getInt32(0));
|
|
|
|
auto *GlobalUBVal = CGF.Builder.getInt32(CS->size() - 1);
|
|
|
|
LValue UB =
|
|
|
|
createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
|
|
|
|
LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
|
|
|
|
CGF.Builder.getInt32(1));
|
|
|
|
LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
|
|
|
|
CGF.Builder.getInt32(0));
|
|
|
|
// Loop counter.
|
|
|
|
LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
|
|
|
|
OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
|
2015-04-14 11:29:22 +08:00
|
|
|
CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
|
2015-04-10 12:50:10 +08:00
|
|
|
OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
|
2015-04-14 11:29:22 +08:00
|
|
|
CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
|
2015-04-10 12:50:10 +08:00
|
|
|
// Generate condition for loop.
|
|
|
|
BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
|
|
|
|
OK_Ordinary, S.getLocStart(),
|
|
|
|
/*fpContractable=*/false);
|
|
|
|
// Increment for loop counter.
|
|
|
|
UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue,
|
|
|
|
OK_Ordinary, S.getLocStart());
|
|
|
|
auto BodyGen = [CS, &S, &IV](CodeGenFunction &CGF) {
|
|
|
|
// Iterate through all sections and emit a switch construct:
|
|
|
|
// switch (IV) {
|
|
|
|
// case 0:
|
|
|
|
// <SectionStmt[0]>;
|
|
|
|
// break;
|
|
|
|
// ...
|
|
|
|
// case <NumSection> - 1:
|
|
|
|
// <SectionStmt[<NumSection> - 1]>;
|
|
|
|
// break;
|
|
|
|
// }
|
|
|
|
// .omp.sections.exit:
|
|
|
|
auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
|
|
|
|
auto *SwitchStmt = CGF.Builder.CreateSwitch(
|
|
|
|
CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
|
|
|
|
CS->size());
|
|
|
|
unsigned CaseNumber = 0;
|
|
|
|
for (auto C = CS->children(); C; ++C, ++CaseNumber) {
|
|
|
|
auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
|
|
|
|
CGF.EmitBlock(CaseBB);
|
|
|
|
SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
|
|
|
|
CGF.EmitStmt(*C);
|
|
|
|
CGF.EmitBranch(ExitBB);
|
|
|
|
}
|
|
|
|
CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
|
|
|
|
};
|
|
|
|
// Emit static non-chunked loop.
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitForInit(
|
|
|
|
CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
|
|
|
|
/*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(),
|
|
|
|
ST.getAddress());
|
|
|
|
// UB = min(UB, GlobalUB);
|
|
|
|
auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
|
|
|
|
auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
|
|
|
|
CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
|
|
|
|
CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
|
|
|
|
// IV = LB;
|
|
|
|
CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
|
|
|
|
// while (idx <= UB) { BODY; ++idx; }
|
|
|
|
CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen);
|
|
|
|
// Tell the runtime we are done.
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitForFinish(CGF, S.getLocStart(),
|
|
|
|
OMPC_SCHEDULE_static);
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
};
|
2015-04-10 12:50:10 +08:00
|
|
|
|
2015-04-14 11:29:22 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, CodeGen);
|
|
|
|
return OMPD_sections;
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
}
|
2015-04-14 11:29:22 +08:00
|
|
|
// If only one section is found - no need to generate loop, emit as a single
|
|
|
|
// region.
|
|
|
|
auto &&CodeGen = [Stmt](CodeGenFunction &CGF) {
|
|
|
|
CGF.EmitStmt(Stmt);
|
|
|
|
CGF.EnsureInsertPoint();
|
|
|
|
};
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitSingleRegion(CGF, CodeGen, S.getLocStart(),
|
|
|
|
llvm::None, llvm::None,
|
|
|
|
llvm::None, llvm::None);
|
|
|
|
return OMPD_single;
|
|
|
|
}
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
|
2015-04-14 11:29:22 +08:00
|
|
|
void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
|
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
|
|
|
OpenMPDirectiveKind EmittedAs = emitSections(*this, S);
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
// Emit an implicit barrier at the end.
|
2015-03-30 12:30:22 +08:00
|
|
|
if (!S.getSingleClause(OMPC_nowait)) {
|
2015-04-14 11:29:22 +08:00
|
|
|
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs);
|
2015-03-30 12:30:22 +08:00
|
|
|
}
|
2014-06-25 19:44:49 +08:00
|
|
|
}
|
|
|
|
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
|
2015-04-10 12:50:10 +08:00
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
|
|
|
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
|
|
|
CGF.EnsureInsertPoint();
|
|
|
|
};
|
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
|
2014-06-26 16:21:58 +08:00
|
|
|
}
|
|
|
|
|
2015-02-05 14:35:41 +08:00
|
|
|
void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
|
2015-03-23 14:18:07 +08:00
|
|
|
llvm::SmallVector<const Expr *, 8> CopyprivateVars;
|
|
|
|
llvm::SmallVector<const Expr *, 8> SrcExprs;
|
|
|
|
llvm::SmallVector<const Expr *, 8> DstExprs;
|
|
|
|
llvm::SmallVector<const Expr *, 8> AssignmentOps;
|
2015-04-10 12:50:10 +08:00
|
|
|
// Check if there are any 'copyprivate' clauses associated with this
|
|
|
|
// 'single'
|
2015-03-23 14:18:07 +08:00
|
|
|
// construct.
|
|
|
|
auto CopyprivateFilter = [](const OMPClause *C) -> bool {
|
|
|
|
return C->getClauseKind() == OMPC_copyprivate;
|
|
|
|
};
|
|
|
|
// Build a list of copyprivate variables along with helper expressions
|
|
|
|
// (<source>, <destination>, <destination>=<source> expressions)
|
|
|
|
typedef OMPExecutableDirective::filtered_clause_iterator<decltype(
|
|
|
|
CopyprivateFilter)> CopyprivateIter;
|
|
|
|
for (CopyprivateIter I(S.clauses(), CopyprivateFilter); I; ++I) {
|
|
|
|
auto *C = cast<OMPCopyprivateClause>(*I);
|
|
|
|
CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
|
|
|
|
SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
|
|
|
|
DstExprs.append(C->destination_exprs().begin(),
|
|
|
|
C->destination_exprs().end());
|
|
|
|
AssignmentOps.append(C->assignment_ops().begin(),
|
|
|
|
C->assignment_ops().end());
|
|
|
|
}
|
2015-04-10 12:50:10 +08:00
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
2015-03-23 14:18:07 +08:00
|
|
|
// Emit code for 'single' region along with 'copyprivate' clauses
|
2015-04-10 12:50:10 +08:00
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
|
|
|
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
|
|
|
CGF.EnsureInsertPoint();
|
|
|
|
};
|
|
|
|
CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
|
|
|
|
CopyprivateVars, SrcExprs, DstExprs,
|
|
|
|
AssignmentOps);
|
2015-03-23 14:18:07 +08:00
|
|
|
// Emit an implicit barrier at the end.
|
2015-03-30 12:30:22 +08:00
|
|
|
if (!S.getSingleClause(OMPC_nowait)) {
|
|
|
|
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_single);
|
|
|
|
}
|
2014-06-26 20:05:45 +08:00
|
|
|
}
|
|
|
|
|
2014-12-04 15:23:53 +08:00
|
|
|
void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
|
2015-04-10 12:50:10 +08:00
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
|
|
|
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
|
|
|
CGF.EnsureInsertPoint();
|
|
|
|
};
|
|
|
|
CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
|
2014-07-17 16:54:58 +08:00
|
|
|
}
|
|
|
|
|
2014-09-22 18:01:53 +08:00
|
|
|
void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
|
2015-04-10 12:50:10 +08:00
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
|
|
|
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
|
|
|
CGF.EnsureInsertPoint();
|
|
|
|
};
|
2015-02-25 16:32:46 +08:00
|
|
|
CGM.getOpenMPRuntime().emitCriticalRegion(
|
2015-04-10 12:50:10 +08:00
|
|
|
*this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart());
|
2014-07-21 17:42:05 +08:00
|
|
|
}
|
|
|
|
|
2015-04-13 13:28:11 +08:00
|
|
|
void CodeGenFunction::EmitOMPParallelForDirective(
|
|
|
|
const OMPParallelForDirective &S) {
|
|
|
|
// Emit directive as a combined directive that consists of two implicit
|
|
|
|
// directives: 'parallel' with 'for' directive.
|
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
|
|
|
CGF.EmitOMPWorksharingLoop(S);
|
|
|
|
// Emit implicit barrier at the end of parallel region, but this barrier
|
|
|
|
// is at the end of 'for' directive, so emit it as the implicit barrier for
|
|
|
|
// this 'for' directive.
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
|
|
|
|
OMPD_parallel);
|
|
|
|
};
|
|
|
|
emitCommonOMPParallelDirective(*this, S, CodeGen);
|
2014-07-07 21:01:15 +08:00
|
|
|
}
|
|
|
|
|
2014-09-23 17:33:00 +08:00
|
|
|
void CodeGenFunction::EmitOMPParallelForSimdDirective(
|
|
|
|
const OMPParallelForSimdDirective &) {
|
|
|
|
llvm_unreachable("CodeGen for 'omp parallel for simd' is not supported yet.");
|
|
|
|
}
|
|
|
|
|
2014-07-08 16:12:03 +08:00
|
|
|
void CodeGenFunction::EmitOMPParallelSectionsDirective(
|
2015-04-14 11:29:22 +08:00
|
|
|
const OMPParallelSectionsDirective &S) {
|
|
|
|
// Emit directive as a combined directive that consists of two implicit
|
|
|
|
// directives: 'parallel' with 'sections' directive.
|
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
|
|
|
(void)emitSections(CGF, S);
|
|
|
|
// Emit implicit barrier at the end of parallel region.
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
|
|
|
|
OMPD_parallel);
|
|
|
|
};
|
|
|
|
emitCommonOMPParallelDirective(*this, S, CodeGen);
|
2014-07-08 16:12:03 +08:00
|
|
|
}
|
|
|
|
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
|
|
|
|
// Emit outlined function for task construct.
|
2015-04-10 12:50:10 +08:00
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
|
|
|
|
auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
|
|
|
|
auto *I = CS->getCapturedDecl()->param_begin();
|
2015-04-10 12:50:10 +08:00
|
|
|
auto *PartId = std::next(I);
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
// The first function argument for tasks is a thread id, the second one is a
|
|
|
|
// part id (0 for tied tasks, >=0 for untied task).
|
2015-04-10 12:50:10 +08:00
|
|
|
auto &&CodeGen = [PartId, &S](CodeGenFunction &CGF) {
|
|
|
|
if (*PartId) {
|
|
|
|
// TODO: emit code for untied tasks.
|
|
|
|
}
|
|
|
|
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
|
|
|
};
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
auto OutlinedFn =
|
2015-04-10 12:50:10 +08:00
|
|
|
CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, CodeGen);
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
// Check if we should emit tied or untied task.
|
|
|
|
bool Tied = !S.getSingleClause(OMPC_untied);
|
|
|
|
// Check if the task is final
|
|
|
|
llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
|
|
|
|
if (auto *Clause = S.getSingleClause(OMPC_final)) {
|
|
|
|
// If the condition constant folds and can be elided, try to avoid emitting
|
|
|
|
// the condition and the dead arm of the if/else.
|
|
|
|
auto *Cond = cast<OMPFinalClause>(Clause)->getCondition();
|
|
|
|
bool CondConstant;
|
|
|
|
if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
|
|
|
|
Final.setInt(CondConstant);
|
|
|
|
else
|
|
|
|
Final.setPointer(EvaluateExprAsBool(Cond));
|
|
|
|
} else {
|
|
|
|
// By default the task is not final.
|
|
|
|
Final.setInt(/*IntVal=*/false);
|
|
|
|
}
|
|
|
|
auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
|
|
|
|
CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), Tied, Final,
|
|
|
|
OutlinedFn, SharedsTy, CapturedStruct);
|
2014-07-11 19:25:16 +08:00
|
|
|
}
|
|
|
|
|
2015-02-05 13:57:51 +08:00
|
|
|
void CodeGenFunction::EmitOMPTaskyieldDirective(
|
|
|
|
const OMPTaskyieldDirective &S) {
|
2015-02-25 16:32:46 +08:00
|
|
|
CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
|
2014-07-18 15:47:19 +08:00
|
|
|
}
|
|
|
|
|
2014-12-05 12:09:23 +08:00
|
|
|
void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
|
2015-03-30 12:30:22 +08:00
|
|
|
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
|
2014-07-18 17:11:51 +08:00
|
|
|
}
|
|
|
|
|
2014-07-18 18:17:07 +08:00
|
|
|
void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &) {
|
|
|
|
llvm_unreachable("CodeGen for 'omp taskwait' is not supported yet.");
|
|
|
|
}
|
|
|
|
|
2014-11-20 12:34:54 +08:00
|
|
|
void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
|
2015-02-25 16:32:46 +08:00
|
|
|
CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
|
|
|
|
if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) {
|
|
|
|
auto FlushClause = cast<OMPFlushClause>(C);
|
|
|
|
return llvm::makeArrayRef(FlushClause->varlist_begin(),
|
|
|
|
FlushClause->varlist_end());
|
|
|
|
}
|
|
|
|
return llvm::None;
|
|
|
|
}(), S.getLocStart());
|
2014-07-21 19:26:11 +08:00
|
|
|
}
|
|
|
|
|
2014-07-22 14:45:04 +08:00
|
|
|
void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &) {
|
|
|
|
llvm_unreachable("CodeGen for 'omp ordered' is not supported yet.");
|
|
|
|
}
|
|
|
|
|
2015-01-22 14:17:56 +08:00
|
|
|
static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
|
|
|
|
QualType SrcType, QualType DestType) {
|
|
|
|
assert(CGF.hasScalarEvaluationKind(DestType) &&
|
|
|
|
"DestType must have scalar evaluation kind.");
|
|
|
|
assert(!Val.isAggregate() && "Must be a scalar or complex.");
|
|
|
|
return Val.isScalar()
|
|
|
|
? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType)
|
|
|
|
: CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
|
|
|
|
DestType);
|
|
|
|
}
|
|
|
|
|
|
|
|
static CodeGenFunction::ComplexPairTy
|
|
|
|
convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
|
|
|
|
QualType DestType) {
|
|
|
|
assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
|
|
|
|
"DestType must have complex evaluation kind.");
|
|
|
|
CodeGenFunction::ComplexPairTy ComplexVal;
|
|
|
|
if (Val.isScalar()) {
|
|
|
|
// Convert the input element to the element type of the complex.
|
|
|
|
auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
|
|
|
|
auto ScalarVal =
|
|
|
|
CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType);
|
|
|
|
ComplexVal = CodeGenFunction::ComplexPairTy(
|
|
|
|
ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
|
|
|
|
} else {
|
|
|
|
assert(Val.isComplex() && "Must be a scalar or complex.");
|
|
|
|
auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
|
|
|
|
auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
|
|
|
|
ComplexVal.first = CGF.EmitScalarConversion(
|
|
|
|
Val.getComplexVal().first, SrcElementType, DestElementType);
|
|
|
|
ComplexVal.second = CGF.EmitScalarConversion(
|
|
|
|
Val.getComplexVal().second, SrcElementType, DestElementType);
|
|
|
|
}
|
|
|
|
return ComplexVal;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
|
|
|
|
const Expr *X, const Expr *V,
|
|
|
|
SourceLocation Loc) {
|
|
|
|
// v = x;
|
|
|
|
assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
|
|
|
|
assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
|
|
|
|
LValue XLValue = CGF.EmitLValue(X);
|
|
|
|
LValue VLValue = CGF.EmitLValue(V);
|
2015-02-14 09:35:12 +08:00
|
|
|
RValue Res = XLValue.isGlobalReg()
|
|
|
|
? CGF.EmitLoadOfLValue(XLValue, Loc)
|
|
|
|
: CGF.EmitAtomicLoad(XLValue, Loc,
|
|
|
|
IsSeqCst ? llvm::SequentiallyConsistent
|
2015-02-27 14:33:30 +08:00
|
|
|
: llvm::Monotonic,
|
|
|
|
XLValue.isVolatile());
|
2015-01-22 14:17:56 +08:00
|
|
|
// OpenMP, 2.12.6, atomic Construct
|
|
|
|
// Any atomic construct with a seq_cst clause forces the atomically
|
|
|
|
// performed operation to include an implicit flush operation without a
|
|
|
|
// list.
|
|
|
|
if (IsSeqCst)
|
2015-02-25 16:32:46 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
|
2015-01-22 14:17:56 +08:00
|
|
|
switch (CGF.getEvaluationKind(V->getType())) {
|
|
|
|
case TEK_Scalar:
|
|
|
|
CGF.EmitStoreOfScalar(
|
|
|
|
convertToScalarValue(CGF, Res, X->getType(), V->getType()), VLValue);
|
|
|
|
break;
|
|
|
|
case TEK_Complex:
|
|
|
|
CGF.EmitStoreOfComplex(
|
|
|
|
convertToComplexValue(CGF, Res, X->getType(), V->getType()), VLValue,
|
|
|
|
/*isInit=*/false);
|
|
|
|
break;
|
|
|
|
case TEK_Aggregate:
|
|
|
|
llvm_unreachable("Must be a scalar or complex.");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-02-27 14:33:30 +08:00
|
|
|
static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
|
|
|
|
const Expr *X, const Expr *E,
|
|
|
|
SourceLocation Loc) {
|
|
|
|
// x = expr;
|
|
|
|
assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
|
|
|
|
LValue XLValue = CGF.EmitLValue(X);
|
|
|
|
RValue ExprRValue = CGF.EmitAnyExpr(E);
|
|
|
|
if (XLValue.isGlobalReg())
|
|
|
|
CGF.EmitStoreThroughGlobalRegLValue(ExprRValue, XLValue);
|
|
|
|
else
|
|
|
|
CGF.EmitAtomicStore(ExprRValue, XLValue,
|
|
|
|
IsSeqCst ? llvm::SequentiallyConsistent
|
|
|
|
: llvm::Monotonic,
|
|
|
|
XLValue.isVolatile(), /*IsInit=*/false);
|
|
|
|
// OpenMP, 2.12.6, atomic Construct
|
|
|
|
// Any atomic construct with a seq_cst clause forces the atomically
|
|
|
|
// performed operation to include an implicit flush operation without a
|
|
|
|
// list.
|
|
|
|
if (IsSeqCst)
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
|
|
|
|
}
|
|
|
|
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
bool emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, RValue Update,
|
|
|
|
BinaryOperatorKind BO, llvm::AtomicOrdering AO,
|
|
|
|
bool IsXLHSInRHSPart) {
|
|
|
|
auto &Context = CGF.CGM.getContext();
|
|
|
|
// Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
|
2015-03-30 13:20:59 +08:00
|
|
|
// expression is simple and atomic is allowed for the given type for the
|
|
|
|
// target platform.
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
if (BO == BO_Comma || !Update.isScalar() ||
|
|
|
|
!Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() ||
|
|
|
|
(!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
|
|
|
|
(Update.getScalarVal()->getType() !=
|
|
|
|
X.getAddress()->getType()->getPointerElementType())) ||
|
|
|
|
!Context.getTargetInfo().hasBuiltinAtomic(
|
|
|
|
Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
llvm::AtomicRMWInst::BinOp RMWOp;
|
|
|
|
switch (BO) {
|
|
|
|
case BO_Add:
|
|
|
|
RMWOp = llvm::AtomicRMWInst::Add;
|
|
|
|
break;
|
|
|
|
case BO_Sub:
|
|
|
|
if (!IsXLHSInRHSPart)
|
|
|
|
return false;
|
|
|
|
RMWOp = llvm::AtomicRMWInst::Sub;
|
|
|
|
break;
|
|
|
|
case BO_And:
|
|
|
|
RMWOp = llvm::AtomicRMWInst::And;
|
|
|
|
break;
|
|
|
|
case BO_Or:
|
|
|
|
RMWOp = llvm::AtomicRMWInst::Or;
|
|
|
|
break;
|
|
|
|
case BO_Xor:
|
|
|
|
RMWOp = llvm::AtomicRMWInst::Xor;
|
|
|
|
break;
|
|
|
|
case BO_LT:
|
|
|
|
RMWOp = X.getType()->hasSignedIntegerRepresentation()
|
|
|
|
? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
|
|
|
|
: llvm::AtomicRMWInst::Max)
|
|
|
|
: (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
|
|
|
|
: llvm::AtomicRMWInst::UMax);
|
|
|
|
break;
|
|
|
|
case BO_GT:
|
|
|
|
RMWOp = X.getType()->hasSignedIntegerRepresentation()
|
|
|
|
? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
|
|
|
|
: llvm::AtomicRMWInst::Min)
|
|
|
|
: (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
|
|
|
|
: llvm::AtomicRMWInst::UMin);
|
|
|
|
break;
|
|
|
|
case BO_Mul:
|
|
|
|
case BO_Div:
|
|
|
|
case BO_Rem:
|
|
|
|
case BO_Shl:
|
|
|
|
case BO_Shr:
|
|
|
|
case BO_LAnd:
|
|
|
|
case BO_LOr:
|
|
|
|
return false;
|
|
|
|
case BO_PtrMemD:
|
|
|
|
case BO_PtrMemI:
|
|
|
|
case BO_LE:
|
|
|
|
case BO_GE:
|
|
|
|
case BO_EQ:
|
|
|
|
case BO_NE:
|
|
|
|
case BO_Assign:
|
|
|
|
case BO_AddAssign:
|
|
|
|
case BO_SubAssign:
|
|
|
|
case BO_AndAssign:
|
|
|
|
case BO_OrAssign:
|
|
|
|
case BO_XorAssign:
|
|
|
|
case BO_MulAssign:
|
|
|
|
case BO_DivAssign:
|
|
|
|
case BO_RemAssign:
|
|
|
|
case BO_ShlAssign:
|
|
|
|
case BO_ShrAssign:
|
|
|
|
case BO_Comma:
|
|
|
|
llvm_unreachable("Unsupported atomic update operation");
|
|
|
|
}
|
|
|
|
auto *UpdateVal = Update.getScalarVal();
|
|
|
|
if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
|
|
|
|
UpdateVal = CGF.Builder.CreateIntCast(
|
|
|
|
IC, X.getAddress()->getType()->getPointerElementType(),
|
|
|
|
X.getType()->hasSignedIntegerRepresentation());
|
|
|
|
}
|
|
|
|
CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
|
|
|
|
LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
|
|
|
|
llvm::AtomicOrdering AO, SourceLocation Loc,
|
|
|
|
const llvm::function_ref<RValue(RValue)> &CommonGen) {
|
|
|
|
// Update expressions are allowed to have the following forms:
|
|
|
|
// x binop= expr; -> xrval + expr;
|
|
|
|
// x++, ++x -> xrval + 1;
|
|
|
|
// x--, --x -> xrval - 1;
|
|
|
|
// x = x binop expr; -> xrval binop expr
|
|
|
|
// x = expr Op x; - > expr binop xrval;
|
|
|
|
if (!emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart)) {
|
|
|
|
if (X.isGlobalReg()) {
|
|
|
|
// Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
|
|
|
|
// 'xrval'.
|
|
|
|
EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
|
|
|
|
} else {
|
|
|
|
// Perform compare-and-swap procedure.
|
|
|
|
EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
|
2015-03-30 13:20:59 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
|
|
|
|
const Expr *X, const Expr *E,
|
|
|
|
const Expr *UE, bool IsXLHSInRHSPart,
|
|
|
|
SourceLocation Loc) {
|
|
|
|
assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
|
|
|
|
"Update expr in 'atomic update' must be a binary operator.");
|
|
|
|
auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
|
|
|
|
// Update expressions are allowed to have the following forms:
|
|
|
|
// x binop= expr; -> xrval + expr;
|
|
|
|
// x++, ++x -> xrval + 1;
|
|
|
|
// x--, --x -> xrval - 1;
|
|
|
|
// x = x binop expr; -> xrval binop expr
|
|
|
|
// x = expr Op x; - > expr binop xrval;
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
|
2015-03-30 13:20:59 +08:00
|
|
|
LValue XLValue = CGF.EmitLValue(X);
|
|
|
|
RValue ExprRValue = CGF.EmitAnyExpr(E);
|
|
|
|
auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic;
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
|
|
|
|
auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
|
|
|
|
auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
|
|
|
|
auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
|
|
|
|
auto Gen =
|
|
|
|
[&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
|
|
|
|
CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
|
|
|
|
CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
|
|
|
|
return CGF.EmitAnyExpr(UE);
|
|
|
|
};
|
|
|
|
CGF.EmitOMPAtomicSimpleUpdateExpr(XLValue, ExprRValue, BOUE->getOpcode(),
|
|
|
|
IsXLHSInRHSPart, AO, Loc, Gen);
|
2015-03-30 13:20:59 +08:00
|
|
|
// OpenMP, 2.12.6, atomic Construct
|
|
|
|
// Any atomic construct with a seq_cst clause forces the atomically
|
|
|
|
// performed operation to include an implicit flush operation without a
|
|
|
|
// list.
|
|
|
|
if (IsSeqCst)
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
|
|
|
|
}
|
|
|
|
|
2015-01-22 14:17:56 +08:00
|
|
|
static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
|
|
|
|
bool IsSeqCst, const Expr *X, const Expr *V,
|
2015-03-30 13:20:59 +08:00
|
|
|
const Expr *E, const Expr *UE,
|
|
|
|
bool IsXLHSInRHSPart, SourceLocation Loc) {
|
2015-01-22 14:17:56 +08:00
|
|
|
switch (Kind) {
|
|
|
|
case OMPC_read:
|
|
|
|
EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
|
|
|
|
break;
|
|
|
|
case OMPC_write:
|
2015-02-27 14:33:30 +08:00
|
|
|
EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
|
|
|
|
break;
|
2015-03-30 13:20:59 +08:00
|
|
|
case OMPC_unknown:
|
2015-01-22 14:17:56 +08:00
|
|
|
case OMPC_update:
|
2015-03-30 13:20:59 +08:00
|
|
|
EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
|
|
|
|
break;
|
2015-01-22 14:17:56 +08:00
|
|
|
case OMPC_capture:
|
|
|
|
llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet.");
|
|
|
|
case OMPC_if:
|
|
|
|
case OMPC_final:
|
|
|
|
case OMPC_num_threads:
|
|
|
|
case OMPC_private:
|
|
|
|
case OMPC_firstprivate:
|
|
|
|
case OMPC_lastprivate:
|
|
|
|
case OMPC_reduction:
|
|
|
|
case OMPC_safelen:
|
|
|
|
case OMPC_collapse:
|
|
|
|
case OMPC_default:
|
|
|
|
case OMPC_seq_cst:
|
|
|
|
case OMPC_shared:
|
|
|
|
case OMPC_linear:
|
|
|
|
case OMPC_aligned:
|
|
|
|
case OMPC_copyin:
|
|
|
|
case OMPC_copyprivate:
|
|
|
|
case OMPC_flush:
|
|
|
|
case OMPC_proc_bind:
|
|
|
|
case OMPC_schedule:
|
|
|
|
case OMPC_ordered:
|
|
|
|
case OMPC_nowait:
|
|
|
|
case OMPC_untied:
|
|
|
|
case OMPC_threadprivate:
|
|
|
|
case OMPC_mergeable:
|
|
|
|
llvm_unreachable("Clause is not allowed in 'omp atomic'.");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
|
|
|
|
bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst);
|
|
|
|
OpenMPClauseKind Kind = OMPC_unknown;
|
|
|
|
for (auto *C : S.clauses()) {
|
|
|
|
// Find first clause (skip seq_cst clause, if it is first).
|
|
|
|
if (C->getClauseKind() != OMPC_seq_cst) {
|
|
|
|
Kind = C->getClauseKind();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2015-03-11 12:48:56 +08:00
|
|
|
|
|
|
|
const auto *CS =
|
|
|
|
S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
|
|
|
|
if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS))
|
|
|
|
enterFullExpression(EWC);
|
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
|
|
|
auto &&CodeGen = [&S, Kind, IsSeqCst](CodeGenFunction &CGF) {
|
|
|
|
EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(),
|
|
|
|
S.getUpdateExpr(), S.isXLHSInRHSPart(), S.getLocStart());
|
|
|
|
};
|
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
|
2014-07-22 18:10:35 +08:00
|
|
|
}
|
|
|
|
|
2014-09-19 16:19:49 +08:00
|
|
|
void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) {
|
|
|
|
llvm_unreachable("CodeGen for 'omp target' is not supported yet.");
|
|
|
|
}
|
|
|
|
|
2014-10-09 12:18:56 +08:00
|
|
|
void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
|
|
|
|
llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
|
|
|
|
}
|
|
|
|
|