llvm-project/clang/lib/CodeGen/CGStmtOpenMP.cpp

706 lines
27 KiB
C++
Raw Normal View History

//===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This contains code to emit OpenMP nodes as LLVM code.
//
//===----------------------------------------------------------------------===//
#include "CGOpenMPRuntime.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/StmtOpenMP.h"
#include "TargetInfo.h"
using namespace clang;
using namespace CodeGen;
namespace {
/// \brief RAII for emitting code of CapturedStmt without function outlining.
class InlinedOpenMPRegion {
CodeGenFunction &CGF;
CodeGenFunction::CGCapturedStmtInfo *PrevCapturedStmtInfo;
const Decl *StoredCurCodeDecl;
/// \brief A class to emit CapturedStmt construct as inlined statement without
/// generating a function for outlined code.
class CGInlinedOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
public:
CGInlinedOpenMPRegionInfo() : CGCapturedStmtInfo() {}
};
public:
InlinedOpenMPRegion(CodeGenFunction &CGF, const Stmt *S)
: CGF(CGF), PrevCapturedStmtInfo(CGF.CapturedStmtInfo),
StoredCurCodeDecl(CGF.CurCodeDecl) {
CGF.CurCodeDecl = cast<CapturedStmt>(S)->getCapturedDecl();
CGF.CapturedStmtInfo = new CGInlinedOpenMPRegionInfo();
}
~InlinedOpenMPRegion() {
delete CGF.CapturedStmtInfo;
CGF.CapturedStmtInfo = PrevCapturedStmtInfo;
CGF.CurCodeDecl = StoredCurCodeDecl;
}
};
} // namespace
//===----------------------------------------------------------------------===//
// OpenMP Directive Emission
//===----------------------------------------------------------------------===//
[OPENMP] Codegen for 'if' clause in 'parallel' directive. Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive. If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function. If condition evaluates to false, the code executes serial version of the code by executing the following code: global_thread_id.addr = alloca i32 store i32 global_thread_id, global_thread_id.addr zero.addr = alloca i32 store i32 0, zero.addr kmpc_serialized_parallel(loc, global_thread_id); microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/); kmpc_end_serialized_parallel(loc, global_thread_id); Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call. Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false). Differential Revision: http://reviews.llvm.org/D4716 llvm-svn: 219597
2014-10-13 14:02:40 +08:00
/// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
/// function. Here is the logic:
/// if (Cond) {
/// CodeGen(true);
/// } else {
/// CodeGen(false);
/// }
static void EmitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
const std::function<void(bool)> &CodeGen) {
CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
// If the condition constant folds and can be elided, try to avoid emitting
// the condition and the dead arm of the if/else.
bool CondConstant;
if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
CodeGen(CondConstant);
return;
}
// Otherwise, the condition did not fold, or we couldn't elide it. Just
// emit the conditional branch.
auto ThenBlock = CGF.createBasicBlock(/*name*/ "omp_if.then");
auto ElseBlock = CGF.createBasicBlock(/*name*/ "omp_if.else");
auto ContBlock = CGF.createBasicBlock(/*name*/ "omp_if.end");
CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount*/ 0);
// Emit the 'then' code.
CGF.EmitBlock(ThenBlock);
CodeGen(/*ThenBlock*/ true);
CGF.EmitBranch(ContBlock);
// Emit the 'else' code if present.
{
// There is no need to emit line number for unconditional branch.
SuppressDebugLocation SDL(CGF.Builder);
[OPENMP] Codegen for 'if' clause in 'parallel' directive. Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive. If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function. If condition evaluates to false, the code executes serial version of the code by executing the following code: global_thread_id.addr = alloca i32 store i32 global_thread_id, global_thread_id.addr zero.addr = alloca i32 store i32 0, zero.addr kmpc_serialized_parallel(loc, global_thread_id); microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/); kmpc_end_serialized_parallel(loc, global_thread_id); Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call. Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false). Differential Revision: http://reviews.llvm.org/D4716 llvm-svn: 219597
2014-10-13 14:02:40 +08:00
CGF.EmitBlock(ElseBlock);
}
CodeGen(/*ThenBlock*/ false);
{
// There is no need to emit line number for unconditional branch.
SuppressDebugLocation SDL(CGF.Builder);
[OPENMP] Codegen for 'if' clause in 'parallel' directive. Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive. If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function. If condition evaluates to false, the code executes serial version of the code by executing the following code: global_thread_id.addr = alloca i32 store i32 global_thread_id, global_thread_id.addr zero.addr = alloca i32 store i32 0, zero.addr kmpc_serialized_parallel(loc, global_thread_id); microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/); kmpc_end_serialized_parallel(loc, global_thread_id); Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call. Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false). Differential Revision: http://reviews.llvm.org/D4716 llvm-svn: 219597
2014-10-13 14:02:40 +08:00
CGF.EmitBranch(ContBlock);
}
// Emit the continuation block for code after the if.
CGF.EmitBlock(ContBlock, /*IsFinished*/ true);
}
void CodeGenFunction::EmitOMPAggregateAssign(LValue OriginalAddr,
llvm::Value *PrivateAddr,
const Expr *AssignExpr,
QualType OriginalType,
const VarDecl *VDInit) {
EmitBlock(createBasicBlock(".omp.assign.begin."));
if (!isa<CXXConstructExpr>(AssignExpr) || isTrivialInitializer(AssignExpr)) {
// Perform simple memcpy.
EmitAggregateAssign(PrivateAddr, OriginalAddr.getAddress(),
AssignExpr->getType());
} else {
// Perform element-by-element initialization.
QualType ElementTy;
auto SrcBegin = OriginalAddr.getAddress();
auto DestBegin = PrivateAddr;
auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
auto SrcNumElements = emitArrayLength(ArrayTy, ElementTy, SrcBegin);
auto DestNumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin);
auto SrcEnd = Builder.CreateGEP(SrcBegin, SrcNumElements);
auto DestEnd = Builder.CreateGEP(DestBegin, DestNumElements);
// The basic structure here is a do-while loop, because we don't
// need to check for the zero-element case.
auto BodyBB = createBasicBlock("omp.arraycpy.body");
auto DoneBB = createBasicBlock("omp.arraycpy.done");
auto IsEmpty =
Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
// Enter the loop body, making that address the current address.
auto EntryBB = Builder.GetInsertBlock();
EmitBlock(BodyBB);
auto SrcElementPast = Builder.CreatePHI(SrcBegin->getType(), 2,
"omp.arraycpy.srcElementPast");
SrcElementPast->addIncoming(SrcEnd, EntryBB);
auto DestElementPast = Builder.CreatePHI(DestBegin->getType(), 2,
"omp.arraycpy.destElementPast");
DestElementPast->addIncoming(DestEnd, EntryBB);
// Shift the address back by one element.
auto NegativeOne = llvm::ConstantInt::get(SizeTy, -1, true);
auto DestElement = Builder.CreateGEP(DestElementPast, NegativeOne,
"omp.arraycpy.dest.element");
auto SrcElement = Builder.CreateGEP(SrcElementPast, NegativeOne,
"omp.arraycpy.src.element");
{
// Create RunCleanScope to cleanup possible temps.
CodeGenFunction::RunCleanupsScope Init(*this);
// Emit initialization for single element.
LocalDeclMap[VDInit] = SrcElement;
EmitAnyExprToMem(AssignExpr, DestElement,
AssignExpr->getType().getQualifiers(),
/*IsInitializer*/ false);
LocalDeclMap.erase(VDInit);
}
// Check whether we've reached the end.
auto Done =
Builder.CreateICmpEQ(DestElement, DestBegin, "omp.arraycpy.done");
Builder.CreateCondBr(Done, DoneBB, BodyBB);
DestElementPast->addIncoming(DestElement, Builder.GetInsertBlock());
SrcElementPast->addIncoming(SrcElement, Builder.GetInsertBlock());
// Done.
EmitBlock(DoneBB, true);
}
EmitBlock(createBasicBlock(".omp.assign.end."));
}
void CodeGenFunction::EmitOMPFirstprivateClause(
const OMPExecutableDirective &D,
CodeGenFunction::OMPPrivateScope &PrivateScope) {
auto PrivateFilter = [](const OMPClause *C) -> bool {
return C->getClauseKind() == OMPC_firstprivate;
};
for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)>
I(D.clauses(), PrivateFilter); I; ++I) {
auto *C = cast<OMPFirstprivateClause>(*I);
auto IRef = C->varlist_begin();
auto InitsRef = C->inits().begin();
for (auto IInit : C->private_copies()) {
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
bool IsRegistered;
if (*InitsRef != nullptr) {
// Emit VarDecl with copy init for arrays.
auto *FD = CapturedStmtInfo->lookup(OrigVD);
LValue Base = MakeNaturalAlignAddrLValue(
CapturedStmtInfo->getContextValue(),
getContext().getTagDeclType(FD->getParent()));
auto OriginalAddr = EmitLValueForField(Base, FD);
auto VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
auto Emission = EmitAutoVarAlloca(*VD);
// Emit initialization of aggregate firstprivate vars.
EmitOMPAggregateAssign(OriginalAddr, Emission.getAllocatedAddress(),
VD->getInit(), (*IRef)->getType(), VDInit);
EmitAutoVarCleanups(Emission);
return Emission.getAllocatedAddress();
});
} else
IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
// Emit private VarDecl with copy init.
EmitDecl(*VD);
return GetAddrOfLocalVar(VD);
});
assert(IsRegistered && "counter already registered as private");
// Silence the warning about unused variable.
(void)IsRegistered;
++IRef, ++InitsRef;
}
}
}
void CodeGenFunction::EmitOMPPrivateClause(
const OMPExecutableDirective &D,
CodeGenFunction::OMPPrivateScope &PrivateScope) {
auto PrivateFilter = [](const OMPClause *C) -> bool {
return C->getClauseKind() == OMPC_private;
};
for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)>
I(D.clauses(), PrivateFilter); I; ++I) {
auto *C = cast<OMPPrivateClause>(*I);
auto IRef = C->varlist_begin();
for (auto IInit : C->private_copies()) {
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
bool IsRegistered =
PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
// Emit private VarDecl with copy init.
EmitDecl(*VD);
return GetAddrOfLocalVar(VD);
});
assert(IsRegistered && "counter already registered as private");
// Silence the warning about unused variable.
(void)IsRegistered;
++IRef;
}
}
}
/// \brief Emits code for OpenMP parallel directive in the parallel region.
static void EmitOMPParallelCall(CodeGenFunction &CGF,
const OMPParallelDirective &S,
llvm::Value *OutlinedFn,
llvm::Value *CapturedStruct) {
if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) {
CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
auto NumThreadsClause = cast<OMPNumThreadsClause>(C);
auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
/*IgnoreResultAssign*/ true);
CGF.CGM.getOpenMPRuntime().EmitOMPNumThreadsClause(
CGF, NumThreads, NumThreadsClause->getLocStart());
}
CGF.CGM.getOpenMPRuntime().EmitOMPParallelCall(CGF, S.getLocStart(),
OutlinedFn, CapturedStruct);
}
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
auto OutlinedFn = CGM.getOpenMPRuntime().EmitOpenMPOutlinedFunction(
S, *CS->getCapturedDecl()->param_begin());
[OPENMP] Codegen for 'if' clause in 'parallel' directive. Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive. If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function. If condition evaluates to false, the code executes serial version of the code by executing the following code: global_thread_id.addr = alloca i32 store i32 global_thread_id, global_thread_id.addr zero.addr = alloca i32 store i32 0, zero.addr kmpc_serialized_parallel(loc, global_thread_id); microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/); kmpc_end_serialized_parallel(loc, global_thread_id); Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call. Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false). Differential Revision: http://reviews.llvm.org/D4716 llvm-svn: 219597
2014-10-13 14:02:40 +08:00
if (auto C = S.getSingleClause(/*K*/ OMPC_if)) {
auto Cond = cast<OMPIfClause>(C)->getCondition();
EmitOMPIfClause(*this, Cond, [&](bool ThenBlock) {
if (ThenBlock)
EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct);
[OPENMP] Codegen for 'if' clause in 'parallel' directive. Adds codegen for 'if' clause. Currently only for 'if' clause used with the 'parallel' directive. If condition evaluates to true, the code executes parallel version of the code by calling __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/), where loc - debug location, 1 - number of additional parameters after "microtask" argument, microtask - is outlined finction for the code associated with the 'parallel' directive, captured_struct - list of variables captured in this outlined function. If condition evaluates to false, the code executes serial version of the code by executing the following code: global_thread_id.addr = alloca i32 store i32 global_thread_id, global_thread_id.addr zero.addr = alloca i32 store i32 0, zero.addr kmpc_serialized_parallel(loc, global_thread_id); microtask(global_thread_id.addr, zero.addr, captured_struct/*context*/); kmpc_end_serialized_parallel(loc, global_thread_id); Where loc - debug location, global_thread_id - global thread id, returned by __kmpc_global_thread_num() call or passed as a first parameter in microtask() call, global_thread_id.addr - address of the variable, where stored global_thread_id value, zero.addr - implicit bound thread id (should be set to 0 for serial call), microtask() and captured_struct are the same as in parallel call. Also this patch checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false). Differential Revision: http://reviews.llvm.org/D4716 llvm-svn: 219597
2014-10-13 14:02:40 +08:00
else
CGM.getOpenMPRuntime().EmitOMPSerialCall(*this, S.getLocStart(),
OutlinedFn, CapturedStruct);
});
} else
EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct);
}
void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S,
bool SeparateIter) {
RunCleanupsScope BodyScope(*this);
// Update counters values on current iteration.
for (auto I : S.updates()) {
EmitIgnoredExpr(I);
}
// On a continue in the body, jump to the end.
auto Continue = getJumpDestInCurrentScope("omp.body.continue");
BreakContinueStack.push_back(BreakContinue(JumpDest(), Continue));
// Emit loop body.
EmitStmt(S.getBody());
// The end (updates/cleanups).
EmitBlock(Continue.getBlock());
BreakContinueStack.pop_back();
if (SeparateIter) {
// TODO: Update lastprivates if the SeparateIter flag is true.
// This will be implemented in a follow-up OMPLastprivateClause patch, but
// result should be still correct without it, as we do not make these
// variables private yet.
}
}
void CodeGenFunction::EmitOMPInnerLoop(const OMPLoopDirective &S,
OMPPrivateScope &LoopScope,
bool SeparateIter) {
auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
auto Cnt = getPGORegionCounter(&S);
// Start the loop with a block that tests the condition.
auto CondBlock = createBasicBlock("omp.inner.for.cond");
EmitBlock(CondBlock);
LoopStack.push(CondBlock);
// If there are any cleanups between here and the loop-exit scope,
// create a block to stage a loop exit along.
auto ExitBlock = LoopExit.getBlock();
if (LoopScope.requiresCleanups())
ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
auto LoopBody = createBasicBlock("omp.inner.for.body");
// Emit condition: "IV < LastIteration + 1 [ - 1]"
// ("- 1" when lastprivate clause is present - separate one iteration).
llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond(SeparateIter));
Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock,
PGO.createLoopWeights(S.getCond(SeparateIter), Cnt));
if (ExitBlock != LoopExit.getBlock()) {
EmitBlock(ExitBlock);
EmitBranchThroughCleanup(LoopExit);
}
EmitBlock(LoopBody);
Cnt.beginRegion(Builder);
// Create a block for the increment.
auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
EmitOMPLoopBody(S);
EmitStopPoint(&S);
// Emit "IV = IV + 1" and a back-edge to the condition block.
EmitBlock(Continue.getBlock());
EmitIgnoredExpr(S.getInc());
BreakContinueStack.pop_back();
EmitBranch(CondBlock);
LoopStack.pop();
// Emit the fall-through block.
EmitBlock(LoopExit.getBlock());
}
void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &S) {
auto IC = S.counters().begin();
for (auto F : S.finals()) {
if (LocalDeclMap.lookup(cast<DeclRefExpr>((*IC))->getDecl())) {
EmitIgnoredExpr(F);
}
++IC;
}
}
static void EmitOMPAlignedClause(CodeGenFunction &CGF, CodeGenModule &CGM,
const OMPAlignedClause &Clause) {
unsigned ClauseAlignment = 0;
if (auto AlignmentExpr = Clause.getAlignment()) {
auto AlignmentCI =
cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
}
for (auto E : Clause.varlists()) {
unsigned Alignment = ClauseAlignment;
if (Alignment == 0) {
// OpenMP [2.8.1, Description]
// If no optional parameter is specified, implementation-defined default
// alignments for SIMD instructions on the target platforms are assumed.
Alignment = CGM.getTargetCodeGenInfo().getOpenMPSimdDefaultAlignment(
E->getType());
}
assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
"alignment is not power of 2");
if (Alignment != 0) {
llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
CGF.EmitAlignmentAssumption(PtrValue, Alignment);
}
}
}
static void EmitPrivateLoopCounters(CodeGenFunction &CGF,
CodeGenFunction::OMPPrivateScope &LoopScope,
ArrayRef<Expr *> Counters) {
for (auto *E : Counters) {
auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
bool IsRegistered = LoopScope.addPrivate(VD, [&]() -> llvm::Value * {
// Emit var without initialization.
auto VarEmission = CGF.EmitAutoVarAlloca(*VD);
CGF.EmitAutoVarCleanups(VarEmission);
return VarEmission.getAllocatedAddress();
});
assert(IsRegistered && "counter already registered as private");
// Silence the warning about unused variable.
(void)IsRegistered;
}
(void)LoopScope.Privatize();
}
void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
// Pragma 'simd' code depends on presence of 'lastprivate'.
// If present, we have to separate last iteration of the loop:
//
// if (LastIteration != 0) {
// for (IV in 0..LastIteration-1) BODY;
// BODY with updates of lastprivate vars;
// <Final counter/linear vars updates>;
// }
//
// otherwise (when there's no lastprivate):
//
// for (IV in 0..LastIteration) BODY;
// <Final counter/linear vars updates>;
//
// Walk clauses and process safelen/lastprivate.
bool SeparateIter = false;
LoopStack.setParallel();
LoopStack.setVectorizerEnable(true);
for (auto C : S.clauses()) {
switch (C->getClauseKind()) {
case OMPC_safelen: {
RValue Len = EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(),
AggValueSlot::ignored(), true);
llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
LoopStack.setVectorizerWidth(Val->getZExtValue());
// In presence of finite 'safelen', it may be unsafe to mark all
// the memory instructions parallel, because loop-carried
// dependences of 'safelen' iterations are possible.
LoopStack.setParallel(false);
break;
}
case OMPC_aligned:
EmitOMPAlignedClause(*this, CGM, cast<OMPAlignedClause>(*C));
break;
case OMPC_lastprivate:
SeparateIter = true;
break;
default:
// Not handled yet
;
}
}
InlinedOpenMPRegion Region(*this, S.getAssociatedStmt());
RunCleanupsScope DirectiveScope(*this);
CGDebugInfo *DI = getDebugInfo();
if (DI)
DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin());
// Emit the loop iteration variable.
const Expr *IVExpr = S.getIterationVariable();
const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
EmitVarDecl(*IVDecl);
EmitIgnoredExpr(S.getInit());
// Emit the iterations count variable.
// If it is not a variable, Sema decided to calculate iterations count on each
// iteration (e.g., it is foldable into a constant).
if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
// Emit calculation of the iterations count.
EmitIgnoredExpr(S.getCalcLastIteration());
}
if (SeparateIter) {
// Emit: if (LastIteration > 0) - begin.
RegionCounter Cnt = getPGORegionCounter(&S);
auto ThenBlock = createBasicBlock("simd.if.then");
auto ContBlock = createBasicBlock("simd.if.end");
EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
EmitBlock(ThenBlock);
Cnt.beginRegion(Builder);
// Emit 'then' code.
{
OMPPrivateScope LoopScope(*this);
EmitPrivateLoopCounters(*this, LoopScope, S.counters());
EmitOMPInnerLoop(S, LoopScope, /* SeparateIter */ true);
EmitOMPLoopBody(S, /* SeparateIter */ true);
}
EmitOMPSimdFinal(S);
// Emit: if (LastIteration != 0) - end.
EmitBranch(ContBlock);
EmitBlock(ContBlock, true);
} else {
{
OMPPrivateScope LoopScope(*this);
EmitPrivateLoopCounters(*this, LoopScope, S.counters());
EmitOMPInnerLoop(S, LoopScope);
}
EmitOMPSimdFinal(S);
}
if (DI)
DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd());
}
/// \brief Emit a helper variable and return corresponding lvalue.
static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
const DeclRefExpr *Helper) {
auto VDecl = cast<VarDecl>(Helper->getDecl());
CGF.EmitVarDecl(*VDecl);
return CGF.EmitLValue(Helper);
}
void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
// Emit the loop iteration variable.
auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
EmitVarDecl(*IVDecl);
// Emit the iterations count variable.
// If it is not a variable, Sema decided to calculate iterations count on each
// iteration (e.g., it is foldable into a constant).
if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
// Emit calculation of the iterations count.
EmitIgnoredExpr(S.getCalcLastIteration());
}
auto &RT = CGM.getOpenMPRuntime();
// Check pre-condition.
{
// Skip the entire loop if we don't meet the precondition.
RegionCounter Cnt = getPGORegionCounter(&S);
auto ThenBlock = createBasicBlock("omp.precond.then");
auto ContBlock = createBasicBlock("omp.precond.end");
EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
EmitBlock(ThenBlock);
Cnt.beginRegion(Builder);
// Emit 'then' code.
{
// Emit helper vars inits.
LValue LB =
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
LValue UB =
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
LValue ST =
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
LValue IL =
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
OMPPrivateScope LoopScope(*this);
EmitPrivateLoopCounters(*this, LoopScope, S.counters());
// Detect the loop schedule kind and chunk.
auto ScheduleKind = OMPC_SCHEDULE_unknown;
llvm::Value *Chunk = nullptr;
if (auto C = cast_or_null<OMPScheduleClause>(
S.getSingleClause(OMPC_schedule))) {
ScheduleKind = C->getScheduleKind();
if (auto Ch = C->getChunkSize()) {
Chunk = EmitScalarExpr(Ch);
Chunk = EmitScalarConversion(Chunk, Ch->getType(),
S.getIterationVariable()->getType());
}
}
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
if (RT.isStaticNonchunked(ScheduleKind,
/* Chunked */ Chunk != nullptr)) {
// OpenMP [2.7.1, Loop Construct, Description, table 2-1]
// When no chunk_size is specified, the iteration space is divided into
// chunks that are approximately equal in size, and at most one chunk is
// distributed to each thread. Note that the size of the chunks is
// unspecified in this case.
RT.EmitOMPForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
IL.getAddress(), LB.getAddress(), UB.getAddress(),
ST.getAddress());
// UB = min(UB, GlobalUB);
EmitIgnoredExpr(S.getEnsureUpperBound());
// IV = LB;
EmitIgnoredExpr(S.getInit());
// while (idx <= UB) { BODY; ++idx; }
EmitOMPInnerLoop(S, LoopScope);
// Tell the runtime we are done.
RT.EmitOMPForFinish(*this, S.getLocStart(), ScheduleKind);
} else
ErrorUnsupported(&S, "OpenMP loop with requested schedule");
}
// We're now done with the loop, so jump to the continuation block.
EmitBranch(ContBlock);
EmitBlock(ContBlock, true);
}
}
void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
InlinedOpenMPRegion Region(*this, S.getAssociatedStmt());
RunCleanupsScope DirectiveScope(*this);
CGDebugInfo *DI = getDebugInfo();
if (DI)
DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin());
EmitOMPWorksharingLoop(S);
// Emit an implicit barrier at the end.
CGM.getOpenMPRuntime().EmitOMPBarrierCall(*this, S.getLocStart(),
/*IsExplicit*/ false);
if (DI)
DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd());
}
void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) {
llvm_unreachable("CodeGen for 'omp for simd' is not supported yet.");
}
void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &) {
llvm_unreachable("CodeGen for 'omp sections' is not supported yet.");
}
void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &) {
llvm_unreachable("CodeGen for 'omp section' is not supported yet.");
}
void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &) {
llvm_unreachable("CodeGen for 'omp single' is not supported yet.");
}
void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
CGM.getOpenMPRuntime().EmitOMPMasterRegion(*this, [&]() -> void {
InlinedOpenMPRegion Region(*this, S.getAssociatedStmt());
RunCleanupsScope Scope(*this);
EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
EnsureInsertPoint();
}, S.getLocStart());
}
void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
CGM.getOpenMPRuntime().EmitOMPCriticalRegion(
*this, S.getDirectiveName().getAsString(), [&]() -> void {
InlinedOpenMPRegion Region(*this, S.getAssociatedStmt());
RunCleanupsScope Scope(*this);
EmitStmt(
cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
EnsureInsertPoint();
}, S.getLocStart());
}
void
CodeGenFunction::EmitOMPParallelForDirective(const OMPParallelForDirective &) {
llvm_unreachable("CodeGen for 'omp parallel for' is not supported yet.");
}
void CodeGenFunction::EmitOMPParallelForSimdDirective(
const OMPParallelForSimdDirective &) {
llvm_unreachable("CodeGen for 'omp parallel for simd' is not supported yet.");
}
void CodeGenFunction::EmitOMPParallelSectionsDirective(
const OMPParallelSectionsDirective &) {
llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet.");
}
void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &) {
llvm_unreachable("CodeGen for 'omp task' is not supported yet.");
}
void CodeGenFunction::EmitOMPTaskyieldDirective(const OMPTaskyieldDirective &) {
llvm_unreachable("CodeGen for 'omp taskyield' is not supported yet.");
}
void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
CGM.getOpenMPRuntime().EmitOMPBarrierCall(*this, S.getLocStart());
}
void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &) {
llvm_unreachable("CodeGen for 'omp taskwait' is not supported yet.");
}
void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
CGM.getOpenMPRuntime().EmitOMPFlush(
*this, [&]() -> ArrayRef<const Expr *> {
if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) {
auto FlushClause = cast<OMPFlushClause>(C);
return llvm::makeArrayRef(FlushClause->varlist_begin(),
FlushClause->varlist_end());
}
return llvm::None;
}(),
S.getLocStart());
}
void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &) {
llvm_unreachable("CodeGen for 'omp ordered' is not supported yet.");
}
void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &) {
llvm_unreachable("CodeGen for 'omp atomic' is not supported yet.");
}
void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) {
llvm_unreachable("CodeGen for 'omp target' is not supported yet.");
}
void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
}