2014-05-06 18:08:46 +08:00
|
|
|
//===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2014-05-06 18:08:46 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This contains code to emit OpenMP nodes as LLVM code.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2016-02-16 19:18:12 +08:00
|
|
|
#include "CGCleanup.h"
|
2014-05-06 18:08:46 +08:00
|
|
|
#include "CGOpenMPRuntime.h"
|
|
|
|
#include "CodeGenFunction.h"
|
|
|
|
#include "CodeGenModule.h"
|
2015-01-14 19:29:14 +08:00
|
|
|
#include "TargetInfo.h"
|
2014-05-06 18:08:46 +08:00
|
|
|
#include "clang/AST/Stmt.h"
|
|
|
|
#include "clang/AST/StmtOpenMP.h"
|
2016-03-03 11:52:24 +08:00
|
|
|
#include "clang/AST/DeclOpenMP.h"
|
2014-05-06 18:08:46 +08:00
|
|
|
using namespace clang;
|
|
|
|
using namespace CodeGen;
|
|
|
|
|
2016-02-16 19:18:12 +08:00
|
|
|
namespace {
|
|
|
|
/// Lexical scope for OpenMP executable constructs, that handles correct codegen
|
|
|
|
/// for captured expressions.
|
2017-01-19 04:40:48 +08:00
|
|
|
class OMPLexicalScope : public CodeGenFunction::LexicalScope {
|
2016-02-16 19:18:12 +08:00
|
|
|
void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
|
|
|
|
for (const auto *C : S.clauses()) {
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
|
|
|
|
if (const auto *PreInit =
|
|
|
|
cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
|
2016-03-03 11:52:24 +08:00
|
|
|
for (const auto *I : PreInit->decls()) {
|
2018-04-14 01:31:06 +08:00
|
|
|
if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
|
2016-03-03 11:52:24 +08:00
|
|
|
CGF.EmitVarDecl(cast<VarDecl>(*I));
|
2018-04-14 01:31:06 +08:00
|
|
|
} else {
|
2016-03-03 11:52:24 +08:00
|
|
|
CodeGenFunction::AutoVarEmission Emission =
|
|
|
|
CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
|
|
|
|
CGF.EmitAutoVarCleanups(Emission);
|
|
|
|
}
|
|
|
|
}
|
2016-02-16 19:18:12 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2016-04-27 15:56:03 +08:00
|
|
|
CodeGenFunction::OMPPrivateScope InlinedShareds;
|
|
|
|
|
|
|
|
static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
|
|
|
|
return CGF.LambdaCaptureFields.lookup(VD) ||
|
|
|
|
(CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
|
|
|
|
(CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
|
|
|
|
}
|
2016-02-16 19:18:12 +08:00
|
|
|
|
|
|
|
public:
|
2018-01-13 03:39:11 +08:00
|
|
|
OMPLexicalScope(
|
|
|
|
CodeGenFunction &CGF, const OMPExecutableDirective &S,
|
|
|
|
const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
|
|
|
|
const bool EmitPreInitStmt = true)
|
2016-04-27 15:56:03 +08:00
|
|
|
: CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
|
|
|
|
InlinedShareds(CGF) {
|
2017-01-19 04:40:48 +08:00
|
|
|
if (EmitPreInitStmt)
|
|
|
|
emitPreInitStmt(CGF, S);
|
2018-01-13 03:39:11 +08:00
|
|
|
if (!CapturedRegion.hasValue())
|
|
|
|
return;
|
|
|
|
assert(S.hasAssociatedStmt() &&
|
|
|
|
"Expected associated statement for inlined directive.");
|
|
|
|
const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const auto &C : CS->captures()) {
|
2018-01-13 03:39:11 +08:00
|
|
|
if (C.capturesVariable() || C.capturesVariableByCopy()) {
|
|
|
|
auto *VD = C.getCapturedVar();
|
|
|
|
assert(VD == VD->getCanonicalDecl() &&
|
|
|
|
"Canonical decl must be captured.");
|
|
|
|
DeclRefExpr DRE(
|
2018-12-21 22:10:18 +08:00
|
|
|
CGF.getContext(), const_cast<VarDecl *>(VD),
|
2018-01-13 03:39:11 +08:00
|
|
|
isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
|
|
|
|
InlinedShareds.isGlobalVarCaptured(VD)),
|
2018-01-24 02:12:38 +08:00
|
|
|
VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
|
2018-01-13 03:39:11 +08:00
|
|
|
InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
|
|
|
|
return CGF.EmitLValue(&DRE).getAddress();
|
|
|
|
});
|
2016-04-27 15:56:03 +08:00
|
|
|
}
|
|
|
|
}
|
2018-01-13 03:39:11 +08:00
|
|
|
(void)InlinedShareds.Privatize();
|
2016-02-16 19:18:12 +08:00
|
|
|
}
|
|
|
|
};
|
2016-03-29 13:34:15 +08:00
|
|
|
|
2017-01-19 04:40:48 +08:00
|
|
|
/// Lexical scope for OpenMP parallel construct, that handles correct codegen
|
|
|
|
/// for captured expressions.
|
|
|
|
class OMPParallelScope final : public OMPLexicalScope {
|
|
|
|
bool EmitPreInitStmt(const OMPExecutableDirective &S) {
|
|
|
|
OpenMPDirectiveKind Kind = S.getDirectiveKind();
|
2017-04-26 01:52:12 +08:00
|
|
|
return !(isOpenMPTargetExecutionDirective(Kind) ||
|
|
|
|
isOpenMPLoopBoundSharingDirective(Kind)) &&
|
2017-01-19 04:40:48 +08:00
|
|
|
isOpenMPParallelDirective(Kind);
|
|
|
|
}
|
|
|
|
|
|
|
|
public:
|
|
|
|
OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
|
2018-01-13 03:39:11 +08:00
|
|
|
: OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
|
|
|
|
EmitPreInitStmt(S)) {}
|
2017-01-19 04:40:48 +08:00
|
|
|
};
|
|
|
|
|
2017-01-25 10:18:43 +08:00
|
|
|
/// Lexical scope for OpenMP teams construct, that handles correct codegen
|
|
|
|
/// for captured expressions.
|
|
|
|
class OMPTeamsScope final : public OMPLexicalScope {
|
|
|
|
bool EmitPreInitStmt(const OMPExecutableDirective &S) {
|
|
|
|
OpenMPDirectiveKind Kind = S.getDirectiveKind();
|
|
|
|
return !isOpenMPTargetExecutionDirective(Kind) &&
|
|
|
|
isOpenMPTeamsDirective(Kind);
|
|
|
|
}
|
|
|
|
|
|
|
|
public:
|
|
|
|
OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
|
2018-01-13 03:39:11 +08:00
|
|
|
: OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
|
|
|
|
EmitPreInitStmt(S)) {}
|
2017-01-25 10:18:43 +08:00
|
|
|
};
|
|
|
|
|
2016-03-29 16:58:54 +08:00
|
|
|
/// Private scope for OpenMP loop-based directives, that supports capturing
|
|
|
|
/// of used expression from loop statement.
|
|
|
|
class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
|
|
|
|
void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
|
2018-03-08 02:17:06 +08:00
|
|
|
CodeGenFunction::OMPMapVars PreCondVars;
|
2019-09-19 03:24:07 +08:00
|
|
|
llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const auto *E : S.counters()) {
|
2017-12-09 04:18:58 +08:00
|
|
|
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
2019-09-19 03:24:07 +08:00
|
|
|
EmittedAsPrivate.insert(VD->getCanonicalDecl());
|
2018-03-08 02:17:06 +08:00
|
|
|
(void)PreCondVars.setVarAddr(
|
|
|
|
CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
|
2017-12-09 04:18:58 +08:00
|
|
|
}
|
2019-09-19 03:24:07 +08:00
|
|
|
// Mark private vars as undefs.
|
|
|
|
for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
|
|
|
|
for (const Expr *IRef : C->varlists()) {
|
|
|
|
const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
|
|
|
|
if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
|
|
|
|
(void)PreCondVars.setVarAddr(
|
|
|
|
CGF, OrigVD,
|
|
|
|
Address(llvm::UndefValue::get(
|
|
|
|
CGF.ConvertTypeForMem(CGF.getContext().getPointerType(
|
|
|
|
OrigVD->getType().getNonReferenceType()))),
|
|
|
|
CGF.getContext().getDeclAlign(OrigVD)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2018-03-08 02:17:06 +08:00
|
|
|
(void)PreCondVars.apply(CGF);
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) {
|
2018-03-01 13:43:23 +08:00
|
|
|
for (const auto *I : PreInits->decls())
|
|
|
|
CGF.EmitVarDecl(cast<VarDecl>(*I));
|
2016-03-29 16:58:54 +08:00
|
|
|
}
|
2018-03-08 02:17:06 +08:00
|
|
|
PreCondVars.restore(CGF);
|
2016-03-29 16:58:54 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
public:
|
|
|
|
OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
|
|
|
|
: CodeGenFunction::RunCleanupsScope(CGF) {
|
|
|
|
emitPreInitStmt(CGF, S);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2017-12-30 02:07:07 +08:00
|
|
|
class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
|
|
|
|
CodeGenFunction::OMPPrivateScope InlinedShareds;
|
|
|
|
|
|
|
|
static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
|
|
|
|
return CGF.LambdaCaptureFields.lookup(VD) ||
|
|
|
|
(CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
|
|
|
|
(CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
|
|
|
|
cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
|
|
|
|
}
|
|
|
|
|
|
|
|
public:
|
|
|
|
OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
|
|
|
|
: CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
|
|
|
|
InlinedShareds(CGF) {
|
|
|
|
for (const auto *C : S.clauses()) {
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
|
|
|
|
if (const auto *PreInit =
|
|
|
|
cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
|
2017-12-30 02:07:07 +08:00
|
|
|
for (const auto *I : PreInit->decls()) {
|
2018-04-14 01:31:06 +08:00
|
|
|
if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
|
2017-12-30 02:07:07 +08:00
|
|
|
CGF.EmitVarDecl(cast<VarDecl>(*I));
|
2018-04-14 01:31:06 +08:00
|
|
|
} else {
|
2017-12-30 02:07:07 +08:00
|
|
|
CodeGenFunction::AutoVarEmission Emission =
|
|
|
|
CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
|
|
|
|
CGF.EmitAutoVarCleanups(Emission);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
|
|
|
|
for (const Expr *E : UDP->varlists()) {
|
|
|
|
const Decl *D = cast<DeclRefExpr>(E)->getDecl();
|
|
|
|
if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
|
|
|
|
CGF.EmitVarDecl(*OED);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!isOpenMPSimdDirective(S.getDirectiveKind()))
|
|
|
|
CGF.EmitOMPPrivateClause(S, InlinedShareds);
|
|
|
|
if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
|
|
|
|
if (const Expr *E = TG->getReductionRef())
|
|
|
|
CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
|
|
|
|
}
|
|
|
|
const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
|
|
|
|
while (CS) {
|
|
|
|
for (auto &C : CS->captures()) {
|
|
|
|
if (C.capturesVariable() || C.capturesVariableByCopy()) {
|
|
|
|
auto *VD = C.getCapturedVar();
|
|
|
|
assert(VD == VD->getCanonicalDecl() &&
|
|
|
|
"Canonical decl must be captured.");
|
2018-12-21 22:10:18 +08:00
|
|
|
DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
|
2017-12-30 02:07:07 +08:00
|
|
|
isCapturedVar(CGF, VD) ||
|
|
|
|
(CGF.CapturedStmtInfo &&
|
|
|
|
InlinedShareds.isGlobalVarCaptured(VD)),
|
|
|
|
VD->getType().getNonReferenceType(), VK_LValue,
|
2018-01-24 02:12:38 +08:00
|
|
|
C.getLocation());
|
2017-12-30 02:07:07 +08:00
|
|
|
InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
|
|
|
|
return CGF.EmitLValue(&DRE).getAddress();
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
|
|
|
|
}
|
|
|
|
(void)InlinedShareds.Privatize();
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2016-02-16 19:18:12 +08:00
|
|
|
} // namespace
|
|
|
|
|
2017-11-18 01:57:25 +08:00
|
|
|
static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
|
|
|
|
const OMPExecutableDirective &S,
|
|
|
|
const RegionCodeGenTy &CodeGen);
|
|
|
|
|
2017-09-26 21:47:31 +08:00
|
|
|
LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
|
|
|
|
if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
|
2017-09-26 21:47:31 +08:00
|
|
|
OrigVD = OrigVD->getCanonicalDecl();
|
|
|
|
bool IsCaptured =
|
|
|
|
LambdaCaptureFields.lookup(OrigVD) ||
|
|
|
|
(CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
|
|
|
|
(CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
|
2018-12-21 22:10:18 +08:00
|
|
|
DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
|
2017-09-26 21:47:31 +08:00
|
|
|
OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
|
|
|
|
return EmitLValue(&DRE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return EmitLValue(E);
|
|
|
|
}
|
|
|
|
|
2016-01-26 20:20:39 +08:00
|
|
|
llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
|
2018-04-14 01:31:06 +08:00
|
|
|
ASTContext &C = getContext();
|
2016-01-26 20:20:39 +08:00
|
|
|
llvm::Value *Size = nullptr;
|
|
|
|
auto SizeInChars = C.getTypeSizeInChars(Ty);
|
|
|
|
if (SizeInChars.isZero()) {
|
|
|
|
// getTypeSizeInChars() returns 0 for a VLA.
|
2018-04-14 01:31:06 +08:00
|
|
|
while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
|
|
|
|
VlaSizePair VlaSize = getVLASize(VAT);
|
2018-02-03 21:55:59 +08:00
|
|
|
Ty = VlaSize.Type;
|
|
|
|
Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts)
|
|
|
|
: VlaSize.NumElts;
|
2016-01-26 20:20:39 +08:00
|
|
|
}
|
|
|
|
SizeInChars = C.getTypeSizeInChars(Ty);
|
|
|
|
if (SizeInChars.isZero())
|
|
|
|
return llvm::ConstantInt::get(SizeTy, /*V=*/0);
|
2018-04-14 01:31:06 +08:00
|
|
|
return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
|
|
|
|
}
|
|
|
|
return CGM.getSize(SizeInChars);
|
2016-01-26 20:20:39 +08:00
|
|
|
}
|
|
|
|
|
2015-09-10 16:12:02 +08:00
|
|
|
void CodeGenFunction::GenerateOpenMPCapturedVars(
|
2015-12-03 01:44:43 +08:00
|
|
|
const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
|
2015-09-10 16:12:02 +08:00
|
|
|
const RecordDecl *RD = S.getCapturedRecordDecl();
|
|
|
|
auto CurField = RD->field_begin();
|
|
|
|
auto CurCap = S.captures().begin();
|
|
|
|
for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
|
|
|
|
E = S.capture_init_end();
|
|
|
|
I != E; ++I, ++CurField, ++CurCap) {
|
|
|
|
if (CurField->hasCapturedVLAType()) {
|
2018-04-14 01:31:06 +08:00
|
|
|
const VariableArrayType *VAT = CurField->getCapturedVLAType();
|
|
|
|
llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
|
2015-10-03 00:14:20 +08:00
|
|
|
CapturedVars.push_back(Val);
|
2018-04-14 01:31:06 +08:00
|
|
|
} else if (CurCap->capturesThis()) {
|
2015-09-10 16:12:02 +08:00
|
|
|
CapturedVars.push_back(CXXThisValue);
|
2018-04-14 01:31:06 +08:00
|
|
|
} else if (CurCap->capturesVariableByCopy()) {
|
2018-01-24 02:44:14 +08:00
|
|
|
llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
|
2016-06-17 02:39:34 +08:00
|
|
|
|
|
|
|
// If the field is not a pointer, we need to save the actual value
|
|
|
|
// and load it as a void pointer.
|
|
|
|
if (!CurField->getType()->isAnyPointerType()) {
|
2018-04-14 01:31:06 +08:00
|
|
|
ASTContext &Ctx = getContext();
|
|
|
|
Address DstAddr = CreateMemTemp(
|
2016-06-17 02:39:34 +08:00
|
|
|
Ctx.getUIntPtrType(),
|
2018-04-14 01:31:06 +08:00
|
|
|
Twine(CurCap->getCapturedVar()->getName(), ".casted"));
|
2016-06-17 02:39:34 +08:00
|
|
|
LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
|
|
|
|
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::Value *SrcAddrVal = EmitScalarConversion(
|
2016-06-17 02:39:34 +08:00
|
|
|
DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
|
2018-01-24 02:12:38 +08:00
|
|
|
Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
|
2016-06-17 02:39:34 +08:00
|
|
|
LValue SrcLV =
|
|
|
|
MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
|
|
|
|
|
|
|
|
// Store the value using the source type pointer.
|
|
|
|
EmitStoreThroughLValue(RValue::get(CV), SrcLV);
|
|
|
|
|
|
|
|
// Load the value using the destination type pointer.
|
2018-01-24 02:44:14 +08:00
|
|
|
CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
|
2016-06-17 02:39:34 +08:00
|
|
|
}
|
|
|
|
CapturedVars.push_back(CV);
|
|
|
|
} else {
|
2015-12-03 01:44:43 +08:00
|
|
|
assert(CurCap->capturesVariable() && "Expected capture by reference.");
|
2015-09-10 16:12:02 +08:00
|
|
|
CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
|
2015-12-03 01:44:43 +08:00
|
|
|
}
|
2015-09-10 16:12:02 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-24 02:12:38 +08:00
|
|
|
static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
|
|
|
|
QualType DstType, StringRef Name,
|
2019-05-24 02:19:54 +08:00
|
|
|
LValue AddrLV) {
|
2015-12-03 01:44:43 +08:00
|
|
|
ASTContext &Ctx = CGF.getContext();
|
|
|
|
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::Value *CastedPtr = CGF.EmitScalarConversion(
|
|
|
|
AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
|
|
|
|
Ctx.getPointerType(DstType), Loc);
|
|
|
|
Address TmpAddr =
|
2015-12-03 01:44:43 +08:00
|
|
|
CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
|
|
|
|
.getAddress();
|
|
|
|
return TmpAddr;
|
|
|
|
}
|
|
|
|
|
2017-04-11 03:16:45 +08:00
|
|
|
static QualType getCanonicalParamType(ASTContext &C, QualType T) {
|
2018-04-14 01:31:06 +08:00
|
|
|
if (T->isLValueReferenceType())
|
2017-04-11 03:16:45 +08:00
|
|
|
return C.getLValueReferenceType(
|
|
|
|
getCanonicalParamType(C, T.getNonReferenceType()),
|
|
|
|
/*SpelledAsLValue=*/false);
|
|
|
|
if (T->isPointerType())
|
|
|
|
return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
|
|
|
|
if (const auto *VLA = dyn_cast<VariableArrayType>(A))
|
2017-10-25 03:52:31 +08:00
|
|
|
return getCanonicalParamType(C, VLA->getElementType());
|
2018-04-14 01:31:06 +08:00
|
|
|
if (!A->isVariablyModifiedType())
|
2017-10-25 03:52:31 +08:00
|
|
|
return C.getCanonicalType(T);
|
|
|
|
}
|
2017-04-11 03:16:45 +08:00
|
|
|
return C.getCanonicalParamType(T);
|
|
|
|
}
|
|
|
|
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
namespace {
|
|
|
|
/// Contains required data for proper outlined function codegen.
|
|
|
|
struct FunctionOptions {
|
|
|
|
/// Captured statement for which the function is generated.
|
|
|
|
const CapturedStmt *S = nullptr;
|
|
|
|
/// true if cast to/from UIntPtr is required for variables captured by
|
|
|
|
/// value.
|
2017-08-09 02:04:06 +08:00
|
|
|
const bool UIntPtrCastRequired = true;
|
2017-08-10 03:38:53 +08:00
|
|
|
/// true if only casted arguments must be registered as local args or VLA
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
/// sizes.
|
2017-08-09 02:04:06 +08:00
|
|
|
const bool RegisterCastedArgsOnly = false;
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
/// Name of the generated function.
|
2017-08-09 02:04:06 +08:00
|
|
|
const StringRef FunctionName;
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
|
|
|
|
bool RegisterCastedArgsOnly,
|
2017-08-09 00:45:36 +08:00
|
|
|
StringRef FunctionName)
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
: S(S), UIntPtrCastRequired(UIntPtrCastRequired),
|
|
|
|
RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
|
2017-08-09 00:45:36 +08:00
|
|
|
FunctionName(FunctionName) {}
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2017-08-10 03:38:53 +08:00
|
|
|
static llvm::Function *emitOutlinedFunctionPrologue(
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
CodeGenFunction &CGF, FunctionArgList &Args,
|
2017-08-09 02:04:06 +08:00
|
|
|
llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
&LocalAddrs,
|
|
|
|
llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
|
|
|
|
&VLASizes,
|
|
|
|
llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
|
|
|
|
const CapturedDecl *CD = FO.S->getCapturedDecl();
|
|
|
|
const RecordDecl *RD = FO.S->getCapturedRecordDecl();
|
2015-09-10 16:12:02 +08:00
|
|
|
assert(CD->hasBody() && "missing CapturedDecl body");
|
|
|
|
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
CXXThisValue = nullptr;
|
2015-09-10 16:12:02 +08:00
|
|
|
// Build the argument list.
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
CodeGenModule &CGM = CGF.CGM;
|
2015-09-10 16:12:02 +08:00
|
|
|
ASTContext &Ctx = CGM.getContext();
|
2017-08-09 02:04:06 +08:00
|
|
|
FunctionArgList TargetArgs;
|
2015-09-10 16:12:02 +08:00
|
|
|
Args.append(CD->param_begin(),
|
|
|
|
std::next(CD->param_begin(), CD->getContextParamPosition()));
|
2017-08-09 02:04:06 +08:00
|
|
|
TargetArgs.append(
|
|
|
|
CD->param_begin(),
|
|
|
|
std::next(CD->param_begin(), CD->getContextParamPosition()));
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
auto I = FO.S->captures().begin();
|
2017-11-23 00:02:03 +08:00
|
|
|
FunctionDecl *DebugFunctionDecl = nullptr;
|
|
|
|
if (!FO.UIntPtrCastRequired) {
|
|
|
|
FunctionProtoType::ExtProtoInfo EPI;
|
2018-11-11 08:56:15 +08:00
|
|
|
QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI);
|
2017-11-23 00:02:03 +08:00
|
|
|
DebugFunctionDecl = FunctionDecl::Create(
|
2018-08-10 05:08:08 +08:00
|
|
|
Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
|
2018-11-11 08:56:15 +08:00
|
|
|
SourceLocation(), DeclarationName(), FunctionTy,
|
|
|
|
Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
|
|
|
|
/*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
|
2017-11-23 00:02:03 +08:00
|
|
|
}
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const FieldDecl *FD : RD->fields()) {
|
2015-09-10 16:12:02 +08:00
|
|
|
QualType ArgType = FD->getType();
|
|
|
|
IdentifierInfo *II = nullptr;
|
|
|
|
VarDecl *CapVar = nullptr;
|
2015-12-03 01:44:43 +08:00
|
|
|
|
|
|
|
// If this is a capture by copy and the type is not a pointer, the outlined
|
|
|
|
// function argument type should be uintptr and the value properly casted to
|
|
|
|
// uintptr. This is necessary given that the runtime library is only able to
|
|
|
|
// deal with pointers. We can pass in the same way the VLA type sizes to the
|
|
|
|
// outlined function.
|
2018-04-14 01:31:06 +08:00
|
|
|
if (FO.UIntPtrCastRequired &&
|
|
|
|
((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
|
|
|
|
I->capturesVariableArrayType()))
|
|
|
|
ArgType = Ctx.getUIntPtrType();
|
2015-12-03 01:44:43 +08:00
|
|
|
|
|
|
|
if (I->capturesVariable() || I->capturesVariableByCopy()) {
|
2015-09-10 16:12:02 +08:00
|
|
|
CapVar = I->getCapturedVar();
|
|
|
|
II = CapVar->getIdentifier();
|
2018-04-14 01:31:06 +08:00
|
|
|
} else if (I->capturesThis()) {
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
II = &Ctx.Idents.get("this");
|
2018-04-14 01:31:06 +08:00
|
|
|
} else {
|
2015-09-10 16:12:02 +08:00
|
|
|
assert(I->capturesVariableArrayType());
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
II = &Ctx.Idents.get("vla");
|
2015-09-10 16:12:02 +08:00
|
|
|
}
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
if (ArgType->isVariablyModifiedType())
|
2017-10-25 03:52:31 +08:00
|
|
|
ArgType = getCanonicalParamType(Ctx, ArgType);
|
2017-11-23 00:02:03 +08:00
|
|
|
VarDecl *Arg;
|
|
|
|
if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
|
|
|
|
Arg = ParmVarDecl::Create(
|
|
|
|
Ctx, DebugFunctionDecl,
|
2018-08-10 05:08:08 +08:00
|
|
|
CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
|
2017-11-23 00:02:03 +08:00
|
|
|
CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
|
|
|
|
/*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
|
|
|
|
} else {
|
|
|
|
Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
|
|
|
|
II, ArgType, ImplicitParamDecl::Other);
|
|
|
|
}
|
2017-08-09 02:04:06 +08:00
|
|
|
Args.emplace_back(Arg);
|
|
|
|
// Do not cast arguments if we emit function with non-original types.
|
|
|
|
TargetArgs.emplace_back(
|
|
|
|
FO.UIntPtrCastRequired
|
|
|
|
? Arg
|
|
|
|
: CGM.getOpenMPRuntime().translateParameter(FD, Arg));
|
2015-09-10 16:12:02 +08:00
|
|
|
++I;
|
|
|
|
}
|
|
|
|
Args.append(
|
|
|
|
std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
|
|
|
|
CD->param_end());
|
2017-08-09 02:04:06 +08:00
|
|
|
TargetArgs.append(
|
|
|
|
std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
|
|
|
|
CD->param_end());
|
2015-09-10 16:12:02 +08:00
|
|
|
|
|
|
|
// Create the function declaration.
|
|
|
|
const CGFunctionInfo &FuncInfo =
|
2017-08-09 02:04:06 +08:00
|
|
|
CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
|
2015-09-10 16:12:02 +08:00
|
|
|
llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
|
|
|
|
|
2018-04-14 01:31:06 +08:00
|
|
|
auto *F =
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
|
|
|
|
FO.FunctionName, &CGM.getModule());
|
2015-09-10 16:12:02 +08:00
|
|
|
CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
|
|
|
|
if (CD->isNothrow())
|
2017-08-05 03:10:54 +08:00
|
|
|
F->setDoesNotThrow();
|
2018-04-11 04:10:53 +08:00
|
|
|
F->setDoesNotRecurse();
|
2015-09-10 16:12:02 +08:00
|
|
|
|
|
|
|
// Generate the function.
|
2017-08-15 00:03:47 +08:00
|
|
|
CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
|
2018-08-10 05:08:08 +08:00
|
|
|
FO.S->getBeginLoc(), CD->getBody()->getBeginLoc());
|
2015-09-10 16:12:02 +08:00
|
|
|
unsigned Cnt = CD->getContextParamPosition();
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
I = FO.S->captures().begin();
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const FieldDecl *FD : RD->fields()) {
|
2017-08-09 02:04:06 +08:00
|
|
|
// Do not map arguments if we emit function with non-original types.
|
|
|
|
Address LocalAddr(Address::invalid());
|
|
|
|
if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
|
|
|
|
LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
|
|
|
|
TargetArgs[Cnt]);
|
|
|
|
} else {
|
|
|
|
LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
|
|
|
|
}
|
2015-12-03 01:44:43 +08:00
|
|
|
// If we are capturing a pointer by copy we don't need to do anything, just
|
|
|
|
// use the value that we get from the arguments.
|
|
|
|
if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
|
2016-07-28 06:49:49 +08:00
|
|
|
const VarDecl *CurVD = I->getCapturedVar();
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
if (!FO.RegisterCastedArgsOnly)
|
|
|
|
LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
|
2016-02-19 06:34:54 +08:00
|
|
|
++Cnt;
|
|
|
|
++I;
|
2015-12-03 01:44:43 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2017-10-10 17:39:32 +08:00
|
|
|
LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
|
|
|
|
AlignmentSource::Decl);
|
2015-09-10 16:12:02 +08:00
|
|
|
if (FD->hasCapturedVLAType()) {
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
if (FO.UIntPtrCastRequired) {
|
2018-01-24 02:12:38 +08:00
|
|
|
ArgLVal = CGF.MakeAddrLValue(
|
|
|
|
castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
|
|
|
|
Args[Cnt]->getName(), ArgLVal),
|
|
|
|
FD->getType(), AlignmentSource::Decl);
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
}
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
|
|
|
|
const VariableArrayType *VAT = FD->getCapturedVLAType();
|
|
|
|
VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
|
2015-09-10 16:12:02 +08:00
|
|
|
} else if (I->capturesVariable()) {
|
2018-04-14 01:31:06 +08:00
|
|
|
const VarDecl *Var = I->getCapturedVar();
|
2015-09-10 16:12:02 +08:00
|
|
|
QualType VarTy = Var->getType();
|
|
|
|
Address ArgAddr = ArgLVal.getAddress();
|
2019-05-24 02:19:54 +08:00
|
|
|
if (ArgLVal.getType()->isLValueReferenceType()) {
|
|
|
|
ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
|
|
|
|
} else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
|
|
|
|
assert(ArgLVal.getType()->isPointerType());
|
|
|
|
ArgAddr = CGF.EmitLoadOfPointer(
|
|
|
|
ArgAddr, ArgLVal.getType()->castAs<PointerType>());
|
2015-09-10 16:12:02 +08:00
|
|
|
}
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
if (!FO.RegisterCastedArgsOnly) {
|
|
|
|
LocalAddrs.insert(
|
|
|
|
{Args[Cnt],
|
|
|
|
{Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
|
|
|
|
}
|
2015-12-03 01:44:43 +08:00
|
|
|
} else if (I->capturesVariableByCopy()) {
|
|
|
|
assert(!FD->getType()->isAnyPointerType() &&
|
|
|
|
"Not expecting a captured pointer.");
|
2018-04-14 01:31:06 +08:00
|
|
|
const VarDecl *Var = I->getCapturedVar();
|
2019-05-24 02:19:54 +08:00
|
|
|
LocalAddrs.insert({Args[Cnt],
|
|
|
|
{Var, FO.UIntPtrCastRequired
|
|
|
|
? castValueFromUintptr(
|
|
|
|
CGF, I->getLocation(), FD->getType(),
|
|
|
|
Args[Cnt]->getName(), ArgLVal)
|
|
|
|
: ArgLVal.getAddress()}});
|
2015-09-10 16:12:02 +08:00
|
|
|
} else {
|
|
|
|
// If 'this' is captured, load it into CXXThisValue.
|
|
|
|
assert(I->capturesThis());
|
2018-01-24 02:44:14 +08:00
|
|
|
CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
|
2015-09-10 16:12:02 +08:00
|
|
|
}
|
2016-02-19 06:34:54 +08:00
|
|
|
++Cnt;
|
|
|
|
++I;
|
2015-09-10 16:12:02 +08:00
|
|
|
}
|
|
|
|
|
2017-08-10 03:38:53 +08:00
|
|
|
return F;
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
llvm::Function *
|
|
|
|
CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
|
|
|
|
assert(
|
|
|
|
CapturedStmtInfo &&
|
|
|
|
"CapturedStmtInfo should be set when generating the captured function");
|
|
|
|
const CapturedDecl *CD = S.getCapturedDecl();
|
|
|
|
// Build the argument list.
|
|
|
|
bool NeedWrapperFunction =
|
|
|
|
getDebugInfo() &&
|
|
|
|
CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
|
|
|
|
FunctionArgList Args;
|
2017-08-09 02:04:06 +08:00
|
|
|
llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
|
2017-08-10 03:38:53 +08:00
|
|
|
SmallString<256> Buffer;
|
|
|
|
llvm::raw_svector_ostream Out(Buffer);
|
|
|
|
Out << CapturedStmtInfo->getHelperName();
|
|
|
|
if (NeedWrapperFunction)
|
|
|
|
Out << "_debug__";
|
2017-08-09 00:45:36 +08:00
|
|
|
FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
|
2017-08-10 03:38:53 +08:00
|
|
|
Out.str());
|
|
|
|
llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
|
|
|
|
VLASizes, CXXThisValue, FO);
|
2019-05-24 02:19:54 +08:00
|
|
|
CodeGenFunction::OMPPrivateScope LocalScope(*this);
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
for (const auto &LocalAddrPair : LocalAddrs) {
|
|
|
|
if (LocalAddrPair.second.first) {
|
2019-05-24 02:19:54 +08:00
|
|
|
LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() {
|
|
|
|
return LocalAddrPair.second.second;
|
|
|
|
});
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
}
|
|
|
|
}
|
2019-05-24 02:19:54 +08:00
|
|
|
(void)LocalScope.Privatize();
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
for (const auto &VLASizePair : VLASizes)
|
|
|
|
VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
|
2015-12-06 22:32:39 +08:00
|
|
|
PGO.assignRegionCounters(GlobalDecl(CD), F);
|
2015-09-10 16:12:02 +08:00
|
|
|
CapturedStmtInfo->EmitBody(*this, CD->getBody());
|
2019-05-24 02:19:54 +08:00
|
|
|
(void)LocalScope.ForceCleanup();
|
2015-09-10 16:12:02 +08:00
|
|
|
FinishFunction(CD->getBodyRBrace());
|
2017-08-10 03:38:53 +08:00
|
|
|
if (!NeedWrapperFunction)
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
return F;
|
|
|
|
|
2017-08-05 05:26:25 +08:00
|
|
|
FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
|
2017-08-10 03:38:53 +08:00
|
|
|
/*RegisterCastedArgsOnly=*/true,
|
|
|
|
CapturedStmtInfo->getHelperName());
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
|
2018-03-14 22:17:45 +08:00
|
|
|
WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
Args.clear();
|
|
|
|
LocalAddrs.clear();
|
|
|
|
VLASizes.clear();
|
|
|
|
llvm::Function *WrapperF =
|
|
|
|
emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
|
2017-08-10 03:38:53 +08:00
|
|
|
WrapperCGF.CXXThisValue, WrapperFO);
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
llvm::SmallVector<llvm::Value *, 4> CallArgs;
|
|
|
|
for (const auto *Arg : Args) {
|
|
|
|
llvm::Value *CallArg;
|
|
|
|
auto I = LocalAddrs.find(Arg);
|
|
|
|
if (I != LocalAddrs.end()) {
|
2017-10-18 00:47:34 +08:00
|
|
|
LValue LV = WrapperCGF.MakeAddrLValue(
|
|
|
|
I->second.second,
|
|
|
|
I->second.first ? I->second.first->getType() : Arg->getType(),
|
|
|
|
AlignmentSource::Decl);
|
2018-08-10 05:08:08 +08:00
|
|
|
CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
} else {
|
|
|
|
auto EI = VLASizes.find(Arg);
|
2018-04-14 01:31:06 +08:00
|
|
|
if (EI != VLASizes.end()) {
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
CallArg = EI->second.second;
|
2018-04-14 01:31:06 +08:00
|
|
|
} else {
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
|
2017-10-10 17:39:32 +08:00
|
|
|
Arg->getType(),
|
|
|
|
AlignmentSource::Decl);
|
2018-08-10 05:08:08 +08:00
|
|
|
CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
}
|
|
|
|
}
|
2017-10-18 00:47:34 +08:00
|
|
|
CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
}
|
2018-08-10 05:08:08 +08:00
|
|
|
CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getBeginLoc(),
|
2017-08-14 23:01:03 +08:00
|
|
|
F, CallArgs);
|
[OPENMP][DEBUG] Generate second function with correct arg types.
Currently, if the some of the parameters are captured by value, this
argument is converted to uintptr_t type and thus we loosing the debug
info about real type of the argument (captured variable):
```
void @.outlined_function.(uintptr %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.outlined_function.(uintptr %a.casted.val)
...
```
To resolve this problem, in debug mode a speciall external wrapper
function is generated, that calls the outlined function with the correct
parameters types:
```
void @.wrapper.(uintptr %par) {
%a = alloca i32
%cast = bitcast i32* %a to uintptr*
store uintptr %par, uintptr *%cast
%a.val = load i32, i32* %a
call void @.outlined_function.(i32 %a)
ret void
}
void @.outlined_function.(i32 %par);
...
%a = alloca i32
%a.casted = alloca uintptr
%cast = bitcast uintptr* %a.casted to i32*
%a.val = load i32, i32 *%a
store i32 %a.val, i32 *%cast
%a.casted.val = load uintptr, uintptr* %a.casted
call void @.wrapper.(uintptr %a.casted.val)
...
```
llvm-svn: 306697
2017-06-30 00:43:05 +08:00
|
|
|
WrapperCGF.FinishFunction();
|
|
|
|
return WrapperF;
|
2015-09-10 16:12:02 +08:00
|
|
|
}
|
|
|
|
|
2014-05-06 18:08:46 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// OpenMP Directive Emission
|
|
|
|
//===----------------------------------------------------------------------===//
|
2015-04-14 13:11:24 +08:00
|
|
|
void CodeGenFunction::EmitOMPAggregateAssign(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address DestAddr, Address SrcAddr, QualType OriginalType,
|
2018-04-14 01:31:06 +08:00
|
|
|
const llvm::function_ref<void(Address, Address)> CopyGen) {
|
2015-04-14 13:11:24 +08:00
|
|
|
// Perform element-by-element initialization.
|
|
|
|
QualType ElementTy;
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
|
|
|
|
// Drill down to the base element type on both arrays.
|
2018-04-14 01:31:06 +08:00
|
|
|
const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
|
|
|
|
llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
|
|
|
|
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::Value *SrcBegin = SrcAddr.getPointer();
|
|
|
|
llvm::Value *DestBegin = DestAddr.getPointer();
|
2015-04-14 13:11:24 +08:00
|
|
|
// Cast from pointer to array type to pointer to single element.
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements);
|
2015-04-14 13:11:24 +08:00
|
|
|
// The basic structure here is a while-do loop.
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
|
|
|
|
llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
|
|
|
|
llvm::Value *IsEmpty =
|
2015-04-14 13:11:24 +08:00
|
|
|
Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
|
|
|
|
Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
|
|
|
|
|
|
|
|
// Enter the loop body, making that address the current address.
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
|
2015-04-14 13:11:24 +08:00
|
|
|
EmitBlock(BodyBB);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
|
|
|
|
CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
|
|
|
|
|
|
|
|
llvm::PHINode *SrcElementPHI =
|
|
|
|
Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
|
|
|
|
SrcElementPHI->addIncoming(SrcBegin, EntryBB);
|
|
|
|
Address SrcElementCurrent =
|
|
|
|
Address(SrcElementPHI,
|
|
|
|
SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
|
|
|
|
|
|
|
|
llvm::PHINode *DestElementPHI =
|
|
|
|
Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
|
|
|
|
DestElementPHI->addIncoming(DestBegin, EntryBB);
|
|
|
|
Address DestElementCurrent =
|
|
|
|
Address(DestElementPHI,
|
|
|
|
DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
|
2015-04-14 13:11:24 +08:00
|
|
|
|
|
|
|
// Emit copy.
|
|
|
|
CopyGen(DestElementCurrent, SrcElementCurrent);
|
|
|
|
|
|
|
|
// Shift the address forward by one element.
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::Value *DestElementNext = Builder.CreateConstGEP1_32(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
|
2015-04-14 13:11:24 +08:00
|
|
|
// Check whether we've reached the end.
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::Value *Done =
|
2015-04-14 13:11:24 +08:00
|
|
|
Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
|
|
|
|
Builder.CreateCondBr(Done, DoneBB, BodyBB);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
|
|
|
|
SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
|
2015-04-14 13:11:24 +08:00
|
|
|
|
|
|
|
// Done.
|
|
|
|
EmitBlock(DoneBB, /*IsFinished=*/true);
|
|
|
|
}
|
2014-10-08 22:01:46 +08:00
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
|
|
|
|
Address SrcAddr, const VarDecl *DestVD,
|
2015-04-14 13:11:24 +08:00
|
|
|
const VarDecl *SrcVD, const Expr *Copy) {
|
|
|
|
if (OriginalType->isArrayType()) {
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *BO = dyn_cast<BinaryOperator>(Copy);
|
2015-04-14 13:11:24 +08:00
|
|
|
if (BO && BO->getOpcode() == BO_Assign) {
|
|
|
|
// Perform simple memcpy for simple copying.
|
2018-01-25 22:21:55 +08:00
|
|
|
LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
|
|
|
|
LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
|
|
|
|
EmitAggregateAssign(Dest, Src, OriginalType);
|
2015-04-14 13:11:24 +08:00
|
|
|
} else {
|
|
|
|
// For arrays with complex element types perform element by element
|
|
|
|
// copying.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
EmitOMPAggregateAssign(
|
2015-04-14 13:11:24 +08:00
|
|
|
DestAddr, SrcAddr, OriginalType,
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
[this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
|
2015-04-14 13:11:24 +08:00
|
|
|
// Working with the single array element, so have to remap
|
|
|
|
// destination and source variables to corresponding array
|
|
|
|
// elements.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CodeGenFunction::OMPPrivateScope Remap(*this);
|
2018-04-14 01:31:06 +08:00
|
|
|
Remap.addPrivate(DestVD, [DestElement]() { return DestElement; });
|
|
|
|
Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; });
|
2015-04-14 13:11:24 +08:00
|
|
|
(void)Remap.Privatize();
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
EmitIgnoredExpr(Copy);
|
2015-04-14 13:11:24 +08:00
|
|
|
});
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Remap pseudo source variable to private copy.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CodeGenFunction::OMPPrivateScope Remap(*this);
|
2018-04-14 01:31:06 +08:00
|
|
|
Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; });
|
|
|
|
Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; });
|
2015-04-14 13:11:24 +08:00
|
|
|
(void)Remap.Privatize();
|
|
|
|
// Emit copying of the whole variable.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
EmitIgnoredExpr(Copy);
|
2014-10-08 22:01:46 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-15 12:52:20 +08:00
|
|
|
bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
|
|
|
|
OMPPrivateScope &PrivateScope) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!HaveInsertPoint())
|
|
|
|
return false;
|
2019-03-06 01:47:18 +08:00
|
|
|
bool DeviceConstTarget =
|
|
|
|
getLangOpts().OpenMPIsDevice &&
|
|
|
|
isOpenMPTargetExecutionDirective(D.getDirectiveKind());
|
2016-02-15 16:07:17 +08:00
|
|
|
bool FirstprivateIsLastprivate = false;
|
|
|
|
llvm::DenseSet<const VarDecl *> Lastprivates;
|
|
|
|
for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
|
|
|
|
for (const auto *D : C->varlists())
|
|
|
|
Lastprivates.insert(
|
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
|
|
|
|
}
|
2015-04-15 12:52:20 +08:00
|
|
|
llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
|
2018-01-13 03:39:11 +08:00
|
|
|
llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
|
|
|
|
getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
|
|
|
|
// Force emission of the firstprivate copy if the directive does not emit
|
|
|
|
// outlined function, like omp for, omp simd, omp distribute etc.
|
|
|
|
bool MustEmitFirstprivateCopy =
|
|
|
|
CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
|
2014-10-08 22:01:46 +08:00
|
|
|
auto IRef = C->varlist_begin();
|
|
|
|
auto InitsRef = C->inits().begin();
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *IInit : C->private_copies()) {
|
|
|
|
const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
|
2016-05-17 16:55:33 +08:00
|
|
|
bool ThisFirstprivateIsLastprivate =
|
|
|
|
Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
|
2018-04-14 01:31:06 +08:00
|
|
|
const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
|
2019-04-04 01:57:06 +08:00
|
|
|
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
|
2018-01-13 03:39:11 +08:00
|
|
|
if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
|
2019-04-04 01:57:06 +08:00
|
|
|
!FD->getType()->isReferenceType() &&
|
|
|
|
(!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
|
2016-05-17 16:55:33 +08:00
|
|
|
EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
|
|
|
|
++IRef;
|
|
|
|
++InitsRef;
|
|
|
|
continue;
|
2019-03-06 01:47:18 +08:00
|
|
|
}
|
|
|
|
// Do not emit copy for firstprivate constant variables in target regions,
|
|
|
|
// captured by reference.
|
|
|
|
if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
|
2019-04-04 01:57:06 +08:00
|
|
|
FD && FD->getType()->isReferenceType() &&
|
|
|
|
(!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
|
2019-03-06 01:47:18 +08:00
|
|
|
(void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this,
|
|
|
|
OrigVD);
|
|
|
|
++IRef;
|
|
|
|
++InitsRef;
|
|
|
|
continue;
|
2016-05-17 16:55:33 +08:00
|
|
|
}
|
2016-02-15 16:07:17 +08:00
|
|
|
FirstprivateIsLastprivate =
|
2016-05-17 16:55:33 +08:00
|
|
|
FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
|
2016-02-15 16:07:17 +08:00
|
|
|
if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *VDInit =
|
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
|
2015-04-15 12:52:20 +08:00
|
|
|
bool IsRegistered;
|
2018-12-21 22:10:18 +08:00
|
|
|
DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
|
2016-05-17 16:55:33 +08:00
|
|
|
/*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
|
|
|
|
(*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
|
2019-05-24 06:30:43 +08:00
|
|
|
LValue OriginalLVal;
|
|
|
|
if (!FD) {
|
|
|
|
// Check if the firstprivate variable is just a constant value.
|
|
|
|
ConstantEmission CE = tryEmitAsConstant(&DRE);
|
|
|
|
if (CE && !CE.isReference()) {
|
|
|
|
// Constant value, no need to create a copy.
|
|
|
|
++IRef;
|
|
|
|
++InitsRef;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (CE && CE.isReference()) {
|
|
|
|
OriginalLVal = CE.getReferenceLValue(*this, &DRE);
|
|
|
|
} else {
|
|
|
|
assert(!CE && "Expected non-constant firstprivate.");
|
|
|
|
OriginalLVal = EmitLValue(&DRE);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
OriginalLVal = EmitLValue(&DRE);
|
|
|
|
}
|
2016-04-22 17:05:03 +08:00
|
|
|
QualType Type = VD->getType();
|
2015-05-19 20:31:28 +08:00
|
|
|
if (Type->isArrayType()) {
|
2015-04-15 12:52:20 +08:00
|
|
|
// Emit VarDecl with copy init for arrays.
|
|
|
|
// Get the address of the original variable captured in current
|
|
|
|
// captured region.
|
2018-04-14 01:31:06 +08:00
|
|
|
IsRegistered = PrivateScope.addPrivate(
|
|
|
|
OrigVD, [this, VD, Type, OriginalLVal, VDInit]() {
|
|
|
|
AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
|
|
|
|
const Expr *Init = VD->getInit();
|
|
|
|
if (!isa<CXXConstructExpr>(Init) ||
|
|
|
|
isTrivialInitializer(Init)) {
|
|
|
|
// Perform simple memcpy.
|
|
|
|
LValue Dest =
|
|
|
|
MakeAddrLValue(Emission.getAllocatedAddress(), Type);
|
|
|
|
EmitAggregateAssign(Dest, OriginalLVal, Type);
|
|
|
|
} else {
|
|
|
|
EmitOMPAggregateAssign(
|
|
|
|
Emission.getAllocatedAddress(), OriginalLVal.getAddress(),
|
|
|
|
Type,
|
|
|
|
[this, VDInit, Init](Address DestElement,
|
|
|
|
Address SrcElement) {
|
|
|
|
// Clean up any temporaries needed by the
|
|
|
|
// initialization.
|
|
|
|
RunCleanupsScope InitScope(*this);
|
|
|
|
// Emit initialization for single element.
|
|
|
|
setAddrOfLocalVar(VDInit, SrcElement);
|
|
|
|
EmitAnyExprToMem(Init, DestElement,
|
|
|
|
Init->getType().getQualifiers(),
|
|
|
|
/*IsInitializer*/ false);
|
|
|
|
LocalDeclMap.erase(VDInit);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
EmitAutoVarCleanups(Emission);
|
|
|
|
return Emission.getAllocatedAddress();
|
|
|
|
});
|
2015-04-15 12:52:20 +08:00
|
|
|
} else {
|
2018-04-14 01:31:06 +08:00
|
|
|
Address OriginalAddr = OriginalLVal.getAddress();
|
|
|
|
IsRegistered = PrivateScope.addPrivate(
|
|
|
|
OrigVD, [this, VDInit, OriginalAddr, VD]() {
|
|
|
|
// Emit private VarDecl with copy init.
|
|
|
|
// Remap temp VDInit variable to the address of the original
|
|
|
|
// variable (for proper handling of captured global variables).
|
|
|
|
setAddrOfLocalVar(VDInit, OriginalAddr);
|
|
|
|
EmitDecl(*VD);
|
|
|
|
LocalDeclMap.erase(VDInit);
|
|
|
|
return GetAddrOfLocalVar(VD);
|
|
|
|
});
|
2015-04-15 12:52:20 +08:00
|
|
|
}
|
|
|
|
assert(IsRegistered &&
|
|
|
|
"firstprivate var already registered as private");
|
|
|
|
// Silence the warning about unused variable.
|
|
|
|
(void)IsRegistered;
|
|
|
|
}
|
2016-02-19 06:34:54 +08:00
|
|
|
++IRef;
|
|
|
|
++InitsRef;
|
2014-10-08 22:01:46 +08:00
|
|
|
}
|
|
|
|
}
|
2016-02-15 16:07:17 +08:00
|
|
|
return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
|
2014-10-08 22:01:46 +08:00
|
|
|
}
|
|
|
|
|
2014-10-21 11:16:40 +08:00
|
|
|
void CodeGenFunction::EmitOMPPrivateClause(
|
|
|
|
const OMPExecutableDirective &D,
|
|
|
|
CodeGenFunction::OMPPrivateScope &PrivateScope) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!HaveInsertPoint())
|
|
|
|
return;
|
2015-04-22 20:24:45 +08:00
|
|
|
llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
|
2014-10-21 11:16:40 +08:00
|
|
|
auto IRef = C->varlist_begin();
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *IInit : C->private_copies()) {
|
|
|
|
const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
|
2015-04-22 20:24:45 +08:00
|
|
|
if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
|
|
|
|
bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
|
|
|
|
// Emit private VarDecl with copy init.
|
|
|
|
EmitDecl(*VD);
|
|
|
|
return GetAddrOfLocalVar(VD);
|
|
|
|
});
|
2015-04-22 20:24:45 +08:00
|
|
|
assert(IsRegistered && "private var already registered as private");
|
|
|
|
// Silence the warning about unused variable.
|
|
|
|
(void)IsRegistered;
|
|
|
|
}
|
2014-10-21 11:16:40 +08:00
|
|
|
++IRef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-16 13:39:01 +08:00
|
|
|
bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!HaveInsertPoint())
|
|
|
|
return false;
|
2015-04-16 13:39:01 +08:00
|
|
|
// threadprivate_var1 = master_threadprivate_var1;
|
|
|
|
// operator=(threadprivate_var2, master_threadprivate_var2);
|
|
|
|
// ...
|
|
|
|
// __kmpc_barrier(&loc, global_tid);
|
|
|
|
llvm::DenseSet<const VarDecl *> CopiedVars;
|
|
|
|
llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
|
2015-04-16 13:39:01 +08:00
|
|
|
auto IRef = C->varlist_begin();
|
|
|
|
auto ISrcRef = C->source_exprs().begin();
|
|
|
|
auto IDestRef = C->destination_exprs().begin();
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *AssignOp : C->assignment_ops()) {
|
|
|
|
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
|
2015-05-19 20:31:28 +08:00
|
|
|
QualType Type = VD->getType();
|
2015-04-16 13:39:01 +08:00
|
|
|
if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
|
2015-07-28 00:38:06 +08:00
|
|
|
// Get the address of the master variable. If we are emitting code with
|
|
|
|
// TLS support, the address is passed from the master as field in the
|
|
|
|
// captured declaration.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address MasterAddr = Address::invalid();
|
2015-07-28 00:38:06 +08:00
|
|
|
if (getLangOpts().OpenMPUseTLS &&
|
|
|
|
getContext().getTargetInfo().isTLSSupported()) {
|
|
|
|
assert(CapturedStmtInfo->lookup(VD) &&
|
|
|
|
"Copyin threadprivates should have been captured!");
|
2018-12-21 22:10:18 +08:00
|
|
|
DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
|
|
|
|
(*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
|
2015-07-28 00:38:06 +08:00
|
|
|
MasterAddr = EmitLValue(&DRE).getAddress();
|
2015-09-10 16:12:02 +08:00
|
|
|
LocalDeclMap.erase(VD);
|
2015-07-28 00:38:06 +08:00
|
|
|
} else {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
MasterAddr =
|
|
|
|
Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
|
|
|
|
: CGM.GetAddrOfGlobal(VD),
|
|
|
|
getContext().getDeclAlign(VD));
|
2015-07-28 00:38:06 +08:00
|
|
|
}
|
2015-04-16 13:39:01 +08:00
|
|
|
// Get the address of the threadprivate variable.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address PrivateAddr = EmitLValue(*IRef).getAddress();
|
2015-04-16 13:39:01 +08:00
|
|
|
if (CopiedVars.size() == 1) {
|
|
|
|
// At first check if current thread is a master thread. If it is, no
|
|
|
|
// need to copy data.
|
|
|
|
CopyBegin = createBasicBlock("copyin.not.master");
|
|
|
|
CopyEnd = createBasicBlock("copyin.not.master.end");
|
|
|
|
Builder.CreateCondBr(
|
|
|
|
Builder.CreateICmpNE(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
|
2018-04-14 01:31:06 +08:00
|
|
|
Builder.CreatePtrToInt(PrivateAddr.getPointer(),
|
|
|
|
CGM.IntPtrTy)),
|
2015-04-16 13:39:01 +08:00
|
|
|
CopyBegin, CopyEnd);
|
|
|
|
EmitBlock(CopyBegin);
|
|
|
|
}
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *SrcVD =
|
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
|
|
|
|
const auto *DestVD =
|
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
|
2015-04-16 13:39:01 +08:00
|
|
|
}
|
|
|
|
++IRef;
|
|
|
|
++ISrcRef;
|
|
|
|
++IDestRef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (CopyEnd) {
|
|
|
|
// Exit out of copying procedure for non-master thread.
|
|
|
|
EmitBlock(CopyEnd, /*IsFinished=*/true);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2015-04-16 12:54:05 +08:00
|
|
|
bool CodeGenFunction::EmitOMPLastprivateClauseInit(
|
|
|
|
const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!HaveInsertPoint())
|
|
|
|
return false;
|
2015-04-16 12:54:05 +08:00
|
|
|
bool HasAtLeastOneLastprivate = false;
|
2016-04-22 11:56:56 +08:00
|
|
|
llvm::DenseSet<const VarDecl *> SIMDLCVs;
|
|
|
|
if (isOpenMPSimdDirective(D.getDirectiveKind())) {
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *LoopDirective = cast<OMPLoopDirective>(&D);
|
|
|
|
for (const Expr *C : LoopDirective->counters()) {
|
2016-04-22 11:56:56 +08:00
|
|
|
SIMDLCVs.insert(
|
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
|
|
|
|
}
|
|
|
|
}
|
2015-04-16 12:54:05 +08:00
|
|
|
llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
|
2015-05-13 18:23:02 +08:00
|
|
|
HasAtLeastOneLastprivate = true;
|
2017-12-30 02:07:07 +08:00
|
|
|
if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
|
|
|
|
!getLangOpts().OpenMPSimd)
|
2016-05-05 16:46:22 +08:00
|
|
|
break;
|
2015-04-16 12:54:05 +08:00
|
|
|
auto IRef = C->varlist_begin();
|
|
|
|
auto IDestRef = C->destination_exprs().begin();
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *IInit : C->private_copies()) {
|
2015-04-16 12:54:05 +08:00
|
|
|
// Keep the address of the original variable for future update at the end
|
|
|
|
// of the loop.
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
|
2016-05-05 16:46:22 +08:00
|
|
|
// Taskloops do not require additional initialization, it is done in
|
|
|
|
// runtime support library.
|
2015-04-16 12:54:05 +08:00
|
|
|
if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *DestVD =
|
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
|
|
|
|
PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() {
|
2018-12-21 22:10:18 +08:00
|
|
|
DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
|
|
|
|
/*RefersToEnclosingVariableOrCapture=*/
|
|
|
|
CapturedStmtInfo->lookup(OrigVD) != nullptr,
|
|
|
|
(*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
|
2015-04-16 12:54:05 +08:00
|
|
|
return EmitLValue(&DRE).getAddress();
|
|
|
|
});
|
|
|
|
// Check if the variable is also a firstprivate: in this case IInit is
|
|
|
|
// not generated. Initialization of this variable will happen in codegen
|
|
|
|
// for 'firstprivate' clause.
|
2016-04-22 11:56:56 +08:00
|
|
|
if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
|
|
|
|
bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
|
2016-05-05 16:46:22 +08:00
|
|
|
// Emit private VarDecl with copy init.
|
|
|
|
EmitDecl(*VD);
|
|
|
|
return GetAddrOfLocalVar(VD);
|
|
|
|
});
|
2015-05-13 18:23:02 +08:00
|
|
|
assert(IsRegistered &&
|
|
|
|
"lastprivate var already registered as private");
|
|
|
|
(void)IsRegistered;
|
|
|
|
}
|
2015-04-16 12:54:05 +08:00
|
|
|
}
|
2016-02-19 06:34:54 +08:00
|
|
|
++IRef;
|
|
|
|
++IDestRef;
|
2015-04-16 12:54:05 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return HasAtLeastOneLastprivate;
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPLastprivateClauseFinal(
|
2016-04-22 11:56:56 +08:00
|
|
|
const OMPExecutableDirective &D, bool NoFinals,
|
|
|
|
llvm::Value *IsLastIterCond) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!HaveInsertPoint())
|
|
|
|
return;
|
2015-04-16 12:54:05 +08:00
|
|
|
// Emit following code:
|
|
|
|
// if (<IsLastIterCond>) {
|
|
|
|
// orig_var1 = private_orig_var1;
|
|
|
|
// ...
|
|
|
|
// orig_varn = private_orig_varn;
|
|
|
|
// }
|
2015-06-16 21:14:42 +08:00
|
|
|
llvm::BasicBlock *ThenBB = nullptr;
|
|
|
|
llvm::BasicBlock *DoneBB = nullptr;
|
|
|
|
if (IsLastIterCond) {
|
|
|
|
ThenBB = createBasicBlock(".omp.lastprivate.then");
|
|
|
|
DoneBB = createBasicBlock(".omp.lastprivate.done");
|
|
|
|
Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
|
|
|
|
EmitBlock(ThenBB);
|
|
|
|
}
|
2016-04-22 11:56:56 +08:00
|
|
|
llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
|
|
|
|
llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
|
2016-02-18 21:48:15 +08:00
|
|
|
auto IC = LoopDirective->counters().begin();
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *F : LoopDirective->finals()) {
|
|
|
|
const auto *D =
|
2016-04-22 11:56:56 +08:00
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
|
|
|
|
if (NoFinals)
|
|
|
|
AlreadyEmittedVars.insert(D);
|
|
|
|
else
|
|
|
|
LoopCountersAndUpdates[D] = F;
|
2016-02-18 21:48:15 +08:00
|
|
|
++IC;
|
2015-05-21 15:59:51 +08:00
|
|
|
}
|
|
|
|
}
|
2016-02-18 21:48:15 +08:00
|
|
|
for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
|
|
|
|
auto IRef = C->varlist_begin();
|
|
|
|
auto ISrcRef = C->source_exprs().begin();
|
|
|
|
auto IDestRef = C->destination_exprs().begin();
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *AssignOp : C->assignment_ops()) {
|
|
|
|
const auto *PrivateVD =
|
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
|
2016-02-18 21:48:15 +08:00
|
|
|
QualType Type = PrivateVD->getType();
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
|
2016-02-18 21:48:15 +08:00
|
|
|
if (AlreadyEmittedVars.insert(CanonicalVD).second) {
|
|
|
|
// If lastprivate variable is a loop control variable for loop-based
|
|
|
|
// directive, update its value before copyin back to original
|
|
|
|
// variable.
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
|
2016-04-22 11:56:56 +08:00
|
|
|
EmitIgnoredExpr(FinalExpr);
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *SrcVD =
|
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
|
|
|
|
const auto *DestVD =
|
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
|
2016-02-18 21:48:15 +08:00
|
|
|
// Get the address of the original variable.
|
|
|
|
Address OriginalAddr = GetAddrOfLocalVar(DestVD);
|
|
|
|
// Get the address of the private variable.
|
|
|
|
Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
|
2016-02-18 21:48:15 +08:00
|
|
|
PrivateAddr =
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address(Builder.CreateLoad(PrivateAddr),
|
|
|
|
getNaturalTypeAlignment(RefTy->getPointeeType()));
|
2016-02-18 21:48:15 +08:00
|
|
|
EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
|
2015-04-16 12:54:05 +08:00
|
|
|
}
|
2016-02-18 21:48:15 +08:00
|
|
|
++IRef;
|
|
|
|
++ISrcRef;
|
|
|
|
++IDestRef;
|
2015-04-16 12:54:05 +08:00
|
|
|
}
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const Expr *PostUpdate = C->getPostUpdateExpr())
|
2016-02-25 13:25:57 +08:00
|
|
|
EmitIgnoredExpr(PostUpdate);
|
2015-04-16 12:54:05 +08:00
|
|
|
}
|
2016-02-18 21:48:15 +08:00
|
|
|
if (IsLastIterCond)
|
2015-06-16 21:14:42 +08:00
|
|
|
EmitBlock(DoneBB, /*IsFinished=*/true);
|
2015-04-16 12:54:05 +08:00
|
|
|
}
|
|
|
|
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
void CodeGenFunction::EmitOMPReductionClauseInit(
|
|
|
|
const OMPExecutableDirective &D,
|
|
|
|
CodeGenFunction::OMPPrivateScope &PrivateScope) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!HaveInsertPoint())
|
|
|
|
return;
|
2017-07-13 21:36:14 +08:00
|
|
|
SmallVector<const Expr *, 4> Shareds;
|
|
|
|
SmallVector<const Expr *, 4> Privates;
|
|
|
|
SmallVector<const Expr *, 4> ReductionOps;
|
|
|
|
SmallVector<const Expr *, 4> LHSs;
|
|
|
|
SmallVector<const Expr *, 4> RHSs;
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
|
2015-10-08 17:10:53 +08:00
|
|
|
auto IPriv = C->privates().begin();
|
2016-03-17 18:19:46 +08:00
|
|
|
auto IRed = C->reduction_ops().begin();
|
2017-07-13 21:36:14 +08:00
|
|
|
auto ILHS = C->lhs_exprs().begin();
|
|
|
|
auto IRHS = C->rhs_exprs().begin();
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *Ref : C->varlists()) {
|
2017-07-13 21:36:14 +08:00
|
|
|
Shareds.emplace_back(Ref);
|
|
|
|
Privates.emplace_back(*IPriv);
|
|
|
|
ReductionOps.emplace_back(*IRed);
|
|
|
|
LHSs.emplace_back(*ILHS);
|
|
|
|
RHSs.emplace_back(*IRHS);
|
|
|
|
std::advance(IPriv, 1);
|
|
|
|
std::advance(IRed, 1);
|
|
|
|
std::advance(ILHS, 1);
|
|
|
|
std::advance(IRHS, 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
|
|
|
|
unsigned Count = 0;
|
|
|
|
auto ILHS = LHSs.begin();
|
|
|
|
auto IRHS = RHSs.begin();
|
|
|
|
auto IPriv = Privates.begin();
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *IRef : Shareds) {
|
|
|
|
const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
|
2017-07-13 21:36:14 +08:00
|
|
|
// Emit private VarDecl with reduction init.
|
|
|
|
RedCG.emitSharedLValue(*this, Count);
|
|
|
|
RedCG.emitAggregateType(*this, Count);
|
2018-04-14 01:31:06 +08:00
|
|
|
AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
|
2017-07-13 21:36:14 +08:00
|
|
|
RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
|
|
|
|
RedCG.getSharedLValue(Count),
|
|
|
|
[&Emission](CodeGenFunction &CGF) {
|
|
|
|
CGF.EmitAutoVarInit(Emission);
|
|
|
|
return true;
|
|
|
|
});
|
|
|
|
EmitAutoVarCleanups(Emission);
|
|
|
|
Address BaseAddr = RedCG.adjustPrivateAddress(
|
|
|
|
*this, Count, Emission.getAllocatedAddress());
|
|
|
|
bool IsRegistered = PrivateScope.addPrivate(
|
2018-04-14 01:31:06 +08:00
|
|
|
RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; });
|
2017-07-13 21:36:14 +08:00
|
|
|
assert(IsRegistered && "private var already registered as private");
|
|
|
|
// Silence the warning about unused variable.
|
|
|
|
(void)IsRegistered;
|
|
|
|
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
|
|
|
|
const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
|
2017-10-24 03:01:35 +08:00
|
|
|
QualType Type = PrivateVD->getType();
|
|
|
|
bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
|
|
|
|
if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
|
2017-07-13 21:36:14 +08:00
|
|
|
// Store the address of the original variable associated with the LHS
|
|
|
|
// implicit variable.
|
2018-04-14 01:31:06 +08:00
|
|
|
PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() {
|
2017-07-13 21:36:14 +08:00
|
|
|
return RedCG.getSharedLValue(Count).getAddress();
|
|
|
|
});
|
2018-04-14 01:31:06 +08:00
|
|
|
PrivateScope.addPrivate(
|
|
|
|
RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); });
|
2017-10-24 03:01:35 +08:00
|
|
|
} else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
|
|
|
|
isa<ArraySubscriptExpr>(IRef)) {
|
2017-07-13 21:36:14 +08:00
|
|
|
// Store the address of the original variable associated with the LHS
|
|
|
|
// implicit variable.
|
2018-04-14 01:31:06 +08:00
|
|
|
PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() {
|
2017-07-13 21:36:14 +08:00
|
|
|
return RedCG.getSharedLValue(Count).getAddress();
|
|
|
|
});
|
2018-04-14 01:31:06 +08:00
|
|
|
PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() {
|
2017-07-13 21:36:14 +08:00
|
|
|
return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
|
|
|
|
ConvertTypeForMem(RHSVD->getType()),
|
|
|
|
"rhs.begin");
|
|
|
|
});
|
|
|
|
} else {
|
|
|
|
QualType Type = PrivateVD->getType();
|
|
|
|
bool IsArray = getContext().getAsArrayType(Type) != nullptr;
|
|
|
|
Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
|
|
|
|
// Store the address of the original variable associated with the LHS
|
|
|
|
// implicit variable.
|
|
|
|
if (IsArray) {
|
|
|
|
OriginalAddr = Builder.CreateElementBitCast(
|
|
|
|
OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
|
2015-10-08 17:10:53 +08:00
|
|
|
}
|
2018-04-14 01:31:06 +08:00
|
|
|
PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; });
|
2017-07-13 21:36:14 +08:00
|
|
|
PrivateScope.addPrivate(
|
2018-04-14 01:31:06 +08:00
|
|
|
RHSVD, [this, PrivateVD, RHSVD, IsArray]() {
|
2017-07-13 21:36:14 +08:00
|
|
|
return IsArray
|
|
|
|
? Builder.CreateElementBitCast(
|
|
|
|
GetAddrOfLocalVar(PrivateVD),
|
|
|
|
ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
|
|
|
|
: GetAddrOfLocalVar(PrivateVD);
|
|
|
|
});
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
}
|
2017-07-13 21:36:14 +08:00
|
|
|
++ILHS;
|
|
|
|
++IRHS;
|
|
|
|
++IPriv;
|
|
|
|
++Count;
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPReductionClauseFinal(
|
2017-02-17 00:20:16 +08:00
|
|
|
const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!HaveInsertPoint())
|
|
|
|
return;
|
2015-10-08 17:10:53 +08:00
|
|
|
llvm::SmallVector<const Expr *, 8> Privates;
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
llvm::SmallVector<const Expr *, 8> LHSExprs;
|
|
|
|
llvm::SmallVector<const Expr *, 8> RHSExprs;
|
|
|
|
llvm::SmallVector<const Expr *, 8> ReductionOps;
|
|
|
|
bool HasAtLeastOneReduction = false;
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
HasAtLeastOneReduction = true;
|
2015-10-08 17:10:53 +08:00
|
|
|
Privates.append(C->privates().begin(), C->privates().end());
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
|
|
|
|
RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
|
|
|
|
ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
|
|
|
|
}
|
|
|
|
if (HasAtLeastOneReduction) {
|
2017-02-17 00:20:16 +08:00
|
|
|
bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
|
|
|
|
isOpenMPParallelDirective(D.getDirectiveKind()) ||
|
2017-12-30 02:07:07 +08:00
|
|
|
ReductionKind == OMPD_simd;
|
|
|
|
bool SimpleReduction = ReductionKind == OMPD_simd;
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
// Emit nowait reduction if nowait clause is present or directive is a
|
|
|
|
// parallel directive (it always has implicit barrier).
|
|
|
|
CGM.getOpenMPRuntime().emitReduction(
|
2018-08-10 05:09:38 +08:00
|
|
|
*this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
|
2017-02-17 00:20:16 +08:00
|
|
|
{WithNowait, SimpleReduction, ReductionKind});
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-02 12:57:40 +08:00
|
|
|
static void emitPostUpdateForReductionClause(
|
|
|
|
CodeGenFunction &CGF, const OMPExecutableDirective &D,
|
2018-04-14 01:31:06 +08:00
|
|
|
const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
|
2016-03-02 12:57:40 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
|
|
|
llvm::BasicBlock *DoneBB = nullptr;
|
|
|
|
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
|
2016-03-02 12:57:40 +08:00
|
|
|
if (!DoneBB) {
|
2018-04-14 01:31:06 +08:00
|
|
|
if (llvm::Value *Cond = CondGen(CGF)) {
|
2016-03-02 12:57:40 +08:00
|
|
|
// If the first post-update expression is found, emit conditional
|
|
|
|
// block if it was requested.
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
|
2016-03-02 12:57:40 +08:00
|
|
|
DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
|
|
|
|
CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
|
|
|
|
CGF.EmitBlock(ThenBB);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
CGF.EmitIgnoredExpr(PostUpdate);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (DoneBB)
|
|
|
|
CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
|
|
|
|
}
|
|
|
|
|
2017-04-26 01:52:12 +08:00
|
|
|
namespace {
|
|
|
|
/// Codegen lambda for appending distribute lower and upper bounds to outlined
|
|
|
|
/// parallel function. This is necessary for combined constructs such as
|
|
|
|
/// 'distribute parallel for'
|
|
|
|
typedef llvm::function_ref<void(CodeGenFunction &,
|
|
|
|
const OMPExecutableDirective &,
|
|
|
|
llvm::SmallVectorImpl<llvm::Value *> &)>
|
|
|
|
CodeGenBoundParametersTy;
|
|
|
|
} // anonymous namespace
|
|
|
|
|
|
|
|
static void emitCommonOMPParallelDirective(
|
|
|
|
CodeGenFunction &CGF, const OMPExecutableDirective &S,
|
|
|
|
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
|
|
|
|
const CodeGenBoundParametersTy &CodeGenBoundParameters) {
|
2017-01-19 02:18:53 +08:00
|
|
|
const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
|
2019-02-06 00:42:33 +08:00
|
|
|
llvm::Function *OutlinedFn =
|
2018-04-14 01:31:06 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
|
|
|
|
S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
|
2015-08-30 23:12:28 +08:00
|
|
|
if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
|
2014-10-13 16:23:51 +08:00
|
|
|
CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::Value *NumThreads =
|
|
|
|
CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
|
|
|
|
/*IgnoreResultAssign=*/true);
|
2015-02-25 16:32:46 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
|
2018-08-10 05:08:08 +08:00
|
|
|
CGF, NumThreads, NumThreadsClause->getBeginLoc());
|
2014-10-13 16:23:51 +08:00
|
|
|
}
|
2015-08-30 23:12:28 +08:00
|
|
|
if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
|
2016-03-29 13:34:15 +08:00
|
|
|
CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
|
2015-06-18 21:40:03 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitProcBindClause(
|
2018-08-10 05:08:08 +08:00
|
|
|
CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
|
2015-06-18 21:40:03 +08:00
|
|
|
}
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
const Expr *IfCond = nullptr;
|
2015-09-03 16:45:56 +08:00
|
|
|
for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
|
|
|
|
if (C->getNameModifier() == OMPD_unknown ||
|
|
|
|
C->getNameModifier() == OMPD_parallel) {
|
|
|
|
IfCond = C->getCondition();
|
|
|
|
break;
|
|
|
|
}
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
}
|
2016-03-29 13:34:15 +08:00
|
|
|
|
2017-01-19 04:40:48 +08:00
|
|
|
OMPParallelScope Scope(CGF, S);
|
2016-03-29 13:34:15 +08:00
|
|
|
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
|
2017-04-26 01:52:12 +08:00
|
|
|
// Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
|
|
|
|
// lower and upper bounds with the pragma 'for' chunking mechanism.
|
|
|
|
// The following lambda takes care of appending the lower and upper bound
|
|
|
|
// parameters when necessary
|
|
|
|
CodeGenBoundParameters(CGF, S, CapturedVars);
|
2016-03-29 13:34:15 +08:00
|
|
|
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
|
2018-08-10 05:08:08 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
|
2015-09-10 16:12:02 +08:00
|
|
|
CapturedVars, IfCond);
|
2015-04-10 12:50:10 +08:00
|
|
|
}
|
|
|
|
|
2017-04-26 01:52:12 +08:00
|
|
|
static void emitEmptyBoundParameters(CodeGenFunction &,
|
|
|
|
const OMPExecutableDirective &,
|
|
|
|
llvm::SmallVectorImpl<llvm::Value *> &) {}
|
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
|
|
|
|
// Emit parallel region as a standalone region.
|
2018-03-16 02:10:54 +08:00
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
2018-03-20 22:45:59 +08:00
|
|
|
Action.Enter(CGF);
|
2015-04-10 12:50:10 +08:00
|
|
|
OMPPrivateScope PrivateScope(CGF);
|
2015-04-16 13:39:01 +08:00
|
|
|
bool Copyins = CGF.EmitOMPCopyinClause(S);
|
2016-02-15 16:07:17 +08:00
|
|
|
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
|
|
|
|
if (Copyins) {
|
2015-04-15 12:52:20 +08:00
|
|
|
// Emit implicit barrier to synchronize threads and avoid data races on
|
2016-02-15 16:07:17 +08:00
|
|
|
// propagation master's thread values of threadprivate variables to local
|
|
|
|
// instances of that variables of all other implicit threads.
|
2015-09-15 20:52:43 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitBarrierCall(
|
2018-08-10 05:08:08 +08:00
|
|
|
CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
|
2015-09-15 20:52:43 +08:00
|
|
|
/*ForceSimpleCall=*/true);
|
2015-04-15 12:52:20 +08:00
|
|
|
}
|
|
|
|
CGF.EmitOMPPrivateClause(S, PrivateScope);
|
|
|
|
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
|
|
|
|
(void)PrivateScope.Privatize();
|
2018-01-13 03:39:11 +08:00
|
|
|
CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
|
2017-02-17 00:20:16 +08:00
|
|
|
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
|
2015-04-10 12:50:10 +08:00
|
|
|
};
|
2017-04-26 01:52:12 +08:00
|
|
|
emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
|
|
|
|
emitEmptyBoundParameters);
|
2018-04-14 01:31:06 +08:00
|
|
|
emitPostUpdateForReductionClause(*this, S,
|
|
|
|
[](CodeGenFunction &) { return nullptr; });
|
2014-05-06 18:08:46 +08:00
|
|
|
}
|
2014-05-22 16:54:05 +08:00
|
|
|
|
2015-07-02 12:17:07 +08:00
|
|
|
void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
|
|
|
|
JumpDest LoopExit) {
|
2014-10-01 14:03:56 +08:00
|
|
|
RunCleanupsScope BodyScope(*this);
|
|
|
|
// Update counters values on current iteration.
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *UE : D.updates())
|
|
|
|
EmitIgnoredExpr(UE);
|
2015-03-21 18:12:56 +08:00
|
|
|
// Update the linear variables.
|
2017-12-04 23:38:33 +08:00
|
|
|
// In distribute directives only loop counters may be marked as linear, no
|
|
|
|
// need to generate the code for them.
|
|
|
|
if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
|
|
|
|
for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *UE : C->updates())
|
|
|
|
EmitIgnoredExpr(UE);
|
2017-12-04 23:38:33 +08:00
|
|
|
}
|
2015-03-21 18:12:56 +08:00
|
|
|
}
|
|
|
|
|
2014-10-01 14:03:56 +08:00
|
|
|
// On a continue in the body, jump to the end.
|
2018-04-14 01:31:06 +08:00
|
|
|
JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
|
2015-07-02 12:17:07 +08:00
|
|
|
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
|
2019-08-15 03:30:06 +08:00
|
|
|
for (const Expr *E : D.finals_conditions()) {
|
|
|
|
if (!E)
|
|
|
|
continue;
|
|
|
|
// Check that loop counter in non-rectangular nest fits into the iteration
|
|
|
|
// space.
|
|
|
|
llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
|
|
|
|
EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
|
|
|
|
getProfileCount(D.getBody()));
|
|
|
|
EmitBlock(NextBB);
|
|
|
|
}
|
2014-10-01 14:03:56 +08:00
|
|
|
// Emit loop body.
|
2015-06-18 12:45:29 +08:00
|
|
|
EmitStmt(D.getBody());
|
2014-10-01 14:03:56 +08:00
|
|
|
// The end (updates/cleanups).
|
|
|
|
EmitBlock(Continue.getBlock());
|
|
|
|
BreakContinueStack.pop_back();
|
|
|
|
}
|
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
void CodeGenFunction::EmitOMPInnerLoop(
|
|
|
|
const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
|
|
|
|
const Expr *IncExpr,
|
2018-04-14 01:31:06 +08:00
|
|
|
const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
|
|
|
|
const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
|
2014-10-07 16:57:09 +08:00
|
|
|
auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
|
2014-10-01 14:03:56 +08:00
|
|
|
|
|
|
|
// Start the loop with a block that tests the condition.
|
2014-10-07 16:57:09 +08:00
|
|
|
auto CondBlock = createBasicBlock("omp.inner.for.cond");
|
2014-10-01 14:03:56 +08:00
|
|
|
EmitBlock(CondBlock);
|
2018-04-14 01:31:06 +08:00
|
|
|
const SourceRange R = S.getSourceRange();
|
2016-11-10 22:44:30 +08:00
|
|
|
LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
|
|
|
|
SourceLocToDebugLoc(R.getEnd()));
|
2014-10-01 14:03:56 +08:00
|
|
|
|
|
|
|
// If there are any cleanups between here and the loop-exit scope,
|
|
|
|
// create a block to stage a loop exit along.
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
if (RequiresCleanup)
|
2014-10-07 16:57:09 +08:00
|
|
|
ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
|
2014-10-01 14:03:56 +08:00
|
|
|
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
|
2014-10-01 14:03:56 +08:00
|
|
|
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
// Emit condition.
|
2015-04-24 07:06:47 +08:00
|
|
|
EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
|
2014-10-01 14:03:56 +08:00
|
|
|
if (ExitBlock != LoopExit.getBlock()) {
|
|
|
|
EmitBlock(ExitBlock);
|
|
|
|
EmitBranchThroughCleanup(LoopExit);
|
|
|
|
}
|
|
|
|
|
|
|
|
EmitBlock(LoopBody);
|
2015-04-24 07:06:47 +08:00
|
|
|
incrementProfileCounter(&S);
|
2014-10-01 14:03:56 +08:00
|
|
|
|
|
|
|
// Create a block for the increment.
|
2018-04-14 01:31:06 +08:00
|
|
|
JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
|
2014-10-01 14:03:56 +08:00
|
|
|
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
|
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
BodyGen(*this);
|
2014-10-01 14:03:56 +08:00
|
|
|
|
|
|
|
// Emit "IV = IV + 1" and a back-edge to the condition block.
|
|
|
|
EmitBlock(Continue.getBlock());
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
EmitIgnoredExpr(IncExpr);
|
2015-04-22 19:15:40 +08:00
|
|
|
PostIncGen(*this);
|
2014-10-01 14:03:56 +08:00
|
|
|
BreakContinueStack.pop_back();
|
|
|
|
EmitBranch(CondBlock);
|
|
|
|
LoopStack.pop();
|
|
|
|
// Emit the fall-through block.
|
|
|
|
EmitBlock(LoopExit.getBlock());
|
|
|
|
}
|
|
|
|
|
2017-08-16 23:58:46 +08:00
|
|
|
bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!HaveInsertPoint())
|
2017-08-16 23:58:46 +08:00
|
|
|
return false;
|
2015-06-17 15:45:51 +08:00
|
|
|
// Emit inits for the linear variables.
|
2017-08-16 23:58:46 +08:00
|
|
|
bool HasLinears = false;
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *Init : C->inits()) {
|
2017-08-16 23:58:46 +08:00
|
|
|
HasLinears = true;
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
|
|
|
|
if (const auto *Ref =
|
|
|
|
dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
|
2016-03-09 17:49:09 +08:00
|
|
|
AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
|
2018-12-21 22:10:18 +08:00
|
|
|
DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
|
2016-03-09 17:49:09 +08:00
|
|
|
CapturedStmtInfo->lookup(OrigVD) != nullptr,
|
|
|
|
VD->getInit()->getType(), VK_LValue,
|
|
|
|
VD->getInit()->getExprLoc());
|
|
|
|
EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
|
|
|
|
VD->getType()),
|
|
|
|
/*capturedByInit=*/false);
|
|
|
|
EmitAutoVarCleanups(Emission);
|
2018-04-14 01:31:06 +08:00
|
|
|
} else {
|
2016-03-09 17:49:09 +08:00
|
|
|
EmitVarDecl(*VD);
|
2018-04-14 01:31:06 +08:00
|
|
|
}
|
2014-10-01 14:03:56 +08:00
|
|
|
}
|
2015-06-17 15:45:51 +08:00
|
|
|
// Emit the linear steps for the linear clauses.
|
|
|
|
// If a step is not constant, it is pre-calculated before the loop.
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
|
|
|
|
if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
|
2015-06-18 18:10:12 +08:00
|
|
|
EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
|
2015-06-17 15:45:51 +08:00
|
|
|
// Emit calculation of the linear step.
|
2015-06-18 18:10:12 +08:00
|
|
|
EmitIgnoredExpr(CS);
|
2015-06-17 15:45:51 +08:00
|
|
|
}
|
2014-10-01 14:03:56 +08:00
|
|
|
}
|
2017-08-16 23:58:46 +08:00
|
|
|
return HasLinears;
|
2015-06-17 15:45:51 +08:00
|
|
|
}
|
|
|
|
|
2016-04-22 11:56:56 +08:00
|
|
|
void CodeGenFunction::EmitOMPLinearClauseFinal(
|
|
|
|
const OMPLoopDirective &D,
|
2018-04-14 01:31:06 +08:00
|
|
|
const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
|
2016-04-22 11:56:56 +08:00
|
|
|
if (!HaveInsertPoint())
|
2015-12-18 15:58:25 +08:00
|
|
|
return;
|
2016-03-09 17:49:09 +08:00
|
|
|
llvm::BasicBlock *DoneBB = nullptr;
|
2015-03-21 18:12:56 +08:00
|
|
|
// Emit the final values of the linear variables.
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
|
2015-05-08 18:41:21 +08:00
|
|
|
auto IC = C->varlist_begin();
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *F : C->finals()) {
|
2016-03-09 17:49:09 +08:00
|
|
|
if (!DoneBB) {
|
2018-04-14 01:31:06 +08:00
|
|
|
if (llvm::Value *Cond = CondGen(*this)) {
|
2016-03-09 17:49:09 +08:00
|
|
|
// If the first post-update expression is found, emit conditional
|
|
|
|
// block if it was requested.
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
|
2016-04-22 11:56:56 +08:00
|
|
|
DoneBB = createBasicBlock(".omp.linear.pu.done");
|
|
|
|
Builder.CreateCondBr(Cond, ThenBB, DoneBB);
|
|
|
|
EmitBlock(ThenBB);
|
2016-03-09 17:49:09 +08:00
|
|
|
}
|
|
|
|
}
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
|
2018-12-21 22:10:18 +08:00
|
|
|
DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
|
2016-04-22 11:56:56 +08:00
|
|
|
CapturedStmtInfo->lookup(OrigVD) != nullptr,
|
2015-05-08 18:41:21 +08:00
|
|
|
(*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
|
2016-04-22 11:56:56 +08:00
|
|
|
Address OrigAddr = EmitLValue(&DRE).getAddress();
|
|
|
|
CodeGenFunction::OMPPrivateScope VarScope(*this);
|
2018-04-14 01:31:06 +08:00
|
|
|
VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
|
2015-05-08 18:41:21 +08:00
|
|
|
(void)VarScope.Privatize();
|
2016-04-22 11:56:56 +08:00
|
|
|
EmitIgnoredExpr(F);
|
2015-05-08 18:41:21 +08:00
|
|
|
++IC;
|
2015-03-21 18:12:56 +08:00
|
|
|
}
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const Expr *PostUpdate = C->getPostUpdateExpr())
|
2016-04-22 11:56:56 +08:00
|
|
|
EmitIgnoredExpr(PostUpdate);
|
2015-03-21 18:12:56 +08:00
|
|
|
}
|
2016-03-09 17:49:09 +08:00
|
|
|
if (DoneBB)
|
2016-04-22 11:56:56 +08:00
|
|
|
EmitBlock(DoneBB, /*IsFinished=*/true);
|
2014-10-01 14:03:56 +08:00
|
|
|
}
|
|
|
|
|
2015-06-17 15:45:51 +08:00
|
|
|
static void emitAlignedClause(CodeGenFunction &CGF,
|
|
|
|
const OMPExecutableDirective &D) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
|
2015-06-17 15:45:51 +08:00
|
|
|
unsigned ClauseAlignment = 0;
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const Expr *AlignmentExpr = Clause->getAlignment()) {
|
|
|
|
auto *AlignmentCI =
|
2015-06-17 15:45:51 +08:00
|
|
|
cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
|
|
|
|
ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
|
2014-09-30 13:29:28 +08:00
|
|
|
}
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *E : Clause->varlists()) {
|
2015-06-17 15:45:51 +08:00
|
|
|
unsigned Alignment = ClauseAlignment;
|
|
|
|
if (Alignment == 0) {
|
|
|
|
// OpenMP [2.8.1, Description]
|
|
|
|
// If no optional parameter is specified, implementation-defined default
|
|
|
|
// alignments for SIMD instructions on the target platforms are assumed.
|
|
|
|
Alignment =
|
2015-07-02 11:40:19 +08:00
|
|
|
CGF.getContext()
|
|
|
|
.toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
|
|
|
|
E->getType()->getPointeeType()))
|
|
|
|
.getQuantity();
|
2015-06-17 15:45:51 +08:00
|
|
|
}
|
|
|
|
assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
|
|
|
|
"alignment is not power of 2");
|
|
|
|
if (Alignment != 0) {
|
|
|
|
llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
|
[clang][UBSan] Sanitization for alignment assumptions.
Summary:
UB isn't nice. It's cool and powerful, but not nice.
Having a way to detect it is nice though.
[[ https://wg21.link/p1007r3 | P1007R3: std::assume_aligned ]] / http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p1007r2.pdf says:
```
We propose to add this functionality via a library function instead of a core language attribute.
...
If the pointer passed in is not aligned to at least N bytes, calling assume_aligned results in undefined behaviour.
```
This differential teaches clang to sanitize all the various variants of this assume-aligned attribute.
Requires D54588 for LLVM IRBuilder changes.
The compiler-rt part is D54590.
This is a second commit, the original one was r351105,
which was mass-reverted in r351159 because 2 compiler-rt tests were failing.
Reviewers: ABataev, craig.topper, vsk, rsmith, rnk, #sanitizers, erichkeane, filcab, rjmccall
Reviewed By: rjmccall
Subscribers: chandlerc, ldionne, EricWF, mclow.lists, cfe-commits, bkramer
Tags: #sanitizers
Differential Revision: https://reviews.llvm.org/D54589
llvm-svn: 351177
2019-01-15 17:44:25 +08:00
|
|
|
CGF.EmitAlignmentAssumption(
|
|
|
|
PtrValue, E, /*No second loc needed*/ SourceLocation(), Alignment);
|
2015-06-17 15:45:51 +08:00
|
|
|
}
|
2014-09-30 13:29:28 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-04-22 11:56:56 +08:00
|
|
|
void CodeGenFunction::EmitOMPPrivateLoopCounters(
|
|
|
|
const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
|
|
|
|
if (!HaveInsertPoint())
|
2015-12-18 15:58:25 +08:00
|
|
|
return;
|
2016-04-22 11:56:56 +08:00
|
|
|
auto I = S.private_counters().begin();
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *E : S.counters()) {
|
|
|
|
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
|
|
|
const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
|
2018-03-08 02:17:06 +08:00
|
|
|
// Emit var without initialization.
|
2018-04-14 01:31:06 +08:00
|
|
|
AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
|
2018-03-08 02:17:06 +08:00
|
|
|
EmitAutoVarCleanups(VarEmission);
|
|
|
|
LocalDeclMap.erase(PrivateVD);
|
|
|
|
(void)LoopScope.addPrivate(VD, [&VarEmission]() {
|
|
|
|
return VarEmission.getAllocatedAddress();
|
2014-10-10 17:48:26 +08:00
|
|
|
});
|
2016-04-22 11:56:56 +08:00
|
|
|
if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
|
|
|
|
VD->hasGlobalStorage()) {
|
2018-03-08 02:17:06 +08:00
|
|
|
(void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() {
|
2018-12-21 22:10:18 +08:00
|
|
|
DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
|
2016-04-22 11:56:56 +08:00
|
|
|
LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
|
|
|
|
E->getType(), VK_LValue, E->getExprLoc());
|
|
|
|
return EmitLValue(&DRE).getAddress();
|
|
|
|
});
|
2018-03-08 02:17:06 +08:00
|
|
|
} else {
|
|
|
|
(void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() {
|
|
|
|
return VarEmission.getAllocatedAddress();
|
|
|
|
});
|
2016-04-22 11:56:56 +08:00
|
|
|
}
|
2015-08-06 20:30:57 +08:00
|
|
|
++I;
|
2014-10-10 17:48:26 +08:00
|
|
|
}
|
2018-08-14 03:04:24 +08:00
|
|
|
// Privatize extra loop counters used in loops for ordered(n) clauses.
|
|
|
|
for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
|
|
|
|
if (!C->getNumForLoops())
|
|
|
|
continue;
|
|
|
|
for (unsigned I = S.getCollapsedNumber(),
|
|
|
|
E = C->getLoopNumIterations().size();
|
|
|
|
I < E; ++I) {
|
2018-09-21 01:19:41 +08:00
|
|
|
const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
|
2018-08-14 03:04:24 +08:00
|
|
|
const auto *VD = cast<VarDecl>(DRE->getDecl());
|
2019-03-15 04:36:00 +08:00
|
|
|
// Override only those variables that can be captured to avoid re-emission
|
|
|
|
// of the variables declared within the loops.
|
|
|
|
if (DRE->refersToEnclosingVariableOrCapture()) {
|
2018-08-14 03:04:24 +08:00
|
|
|
(void)LoopScope.addPrivate(VD, [this, DRE, VD]() {
|
|
|
|
return CreateMemTemp(DRE->getType(), VD->getName());
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-10-10 17:48:26 +08:00
|
|
|
}
|
|
|
|
|
2015-04-22 19:59:37 +08:00
|
|
|
static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
|
|
|
|
const Expr *Cond, llvm::BasicBlock *TrueBlock,
|
|
|
|
llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-06-11 18:53:56 +08:00
|
|
|
{
|
|
|
|
CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
|
2016-04-22 11:56:56 +08:00
|
|
|
CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
|
2015-06-11 18:53:56 +08:00
|
|
|
(void)PreCondScope.Privatize();
|
|
|
|
// Get initial values of real counters.
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *I : S.inits()) {
|
2015-06-11 18:53:56 +08:00
|
|
|
CGF.EmitIgnoredExpr(I);
|
|
|
|
}
|
2015-04-22 19:59:37 +08:00
|
|
|
}
|
2019-08-15 03:30:06 +08:00
|
|
|
// Create temp loop control variables with their init values to support
|
|
|
|
// non-rectangular loops.
|
|
|
|
CodeGenFunction::OMPMapVars PreCondVars;
|
|
|
|
for (const Expr * E: S.dependent_counters()) {
|
|
|
|
if (!E)
|
|
|
|
continue;
|
|
|
|
assert(!E->getType().getNonReferenceType()->isRecordType() &&
|
|
|
|
"dependent counter must not be an iterator.");
|
|
|
|
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
|
|
|
Address CounterAddr =
|
|
|
|
CGF.CreateMemTemp(VD->getType().getNonReferenceType());
|
|
|
|
(void)PreCondVars.setVarAddr(CGF, VD, CounterAddr);
|
|
|
|
}
|
|
|
|
(void)PreCondVars.apply(CGF);
|
|
|
|
for (const Expr *E : S.dependent_inits()) {
|
|
|
|
if (!E)
|
|
|
|
continue;
|
|
|
|
CGF.EmitIgnoredExpr(E);
|
|
|
|
}
|
2015-04-22 19:59:37 +08:00
|
|
|
// Check that loop is executed at least one time.
|
|
|
|
CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
|
2019-08-15 03:30:06 +08:00
|
|
|
PreCondVars.restore(CGF);
|
2015-04-22 19:59:37 +08:00
|
|
|
}
|
|
|
|
|
2016-04-22 11:56:56 +08:00
|
|
|
void CodeGenFunction::EmitOMPLinearClause(
|
|
|
|
const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
|
|
|
|
if (!HaveInsertPoint())
|
2015-12-18 15:58:25 +08:00
|
|
|
return;
|
2016-04-22 11:56:56 +08:00
|
|
|
llvm::DenseSet<const VarDecl *> SIMDLCVs;
|
|
|
|
if (isOpenMPSimdDirective(D.getDirectiveKind())) {
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *LoopDirective = cast<OMPLoopDirective>(&D);
|
|
|
|
for (const Expr *C : LoopDirective->counters()) {
|
2016-04-22 11:56:56 +08:00
|
|
|
SIMDLCVs.insert(
|
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
|
|
|
|
}
|
|
|
|
}
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
|
2015-08-18 14:47:21 +08:00
|
|
|
auto CurPrivate = C->privates().begin();
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *E : C->varlists()) {
|
|
|
|
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
|
|
|
const auto *PrivateVD =
|
2015-08-18 14:47:21 +08:00
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
|
2016-04-22 11:56:56 +08:00
|
|
|
if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
|
2018-04-14 01:31:06 +08:00
|
|
|
bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() {
|
2016-04-22 11:56:56 +08:00
|
|
|
// Emit private VarDecl with copy init.
|
|
|
|
EmitVarDecl(*PrivateVD);
|
|
|
|
return GetAddrOfLocalVar(PrivateVD);
|
|
|
|
});
|
|
|
|
assert(IsRegistered && "linear var already registered as private");
|
|
|
|
// Silence the warning about unused variable.
|
|
|
|
(void)IsRegistered;
|
2018-04-14 01:31:06 +08:00
|
|
|
} else {
|
2016-04-22 11:56:56 +08:00
|
|
|
EmitVarDecl(*PrivateVD);
|
2018-04-14 01:31:06 +08:00
|
|
|
}
|
2015-08-18 14:47:21 +08:00
|
|
|
++CurPrivate;
|
2015-03-21 18:12:56 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-21 20:19:04 +08:00
|
|
|
static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
|
2015-12-31 14:52:34 +08:00
|
|
|
const OMPExecutableDirective &D,
|
|
|
|
bool IsMonotonic) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-08-30 23:12:28 +08:00
|
|
|
if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
|
2015-08-21 20:19:04 +08:00
|
|
|
RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
|
|
|
|
/*ignoreResult=*/true);
|
2018-04-14 01:31:06 +08:00
|
|
|
auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
|
2015-08-21 20:19:04 +08:00
|
|
|
CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
|
|
|
|
// In presence of finite 'safelen', it may be unsafe to mark all
|
|
|
|
// the memory instructions parallel, because loop-carried
|
|
|
|
// dependences of 'safelen' iterations are possible.
|
2015-12-31 14:52:34 +08:00
|
|
|
if (!IsMonotonic)
|
|
|
|
CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
|
2015-08-30 23:12:28 +08:00
|
|
|
} else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
|
2015-06-17 15:45:51 +08:00
|
|
|
RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
|
|
|
|
/*ignoreResult=*/true);
|
2018-04-14 01:31:06 +08:00
|
|
|
auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
|
2015-07-15 07:03:09 +08:00
|
|
|
CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
|
2015-06-17 15:45:51 +08:00
|
|
|
// In presence of finite 'safelen', it may be unsafe to mark all
|
|
|
|
// the memory instructions parallel, because loop-carried
|
|
|
|
// dependences of 'safelen' iterations are possible.
|
2018-04-14 01:31:06 +08:00
|
|
|
CGF.LoopStack.setParallel(/*Enable=*/false);
|
2015-06-17 15:45:51 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-12-31 14:52:34 +08:00
|
|
|
void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
|
|
|
|
bool IsMonotonic) {
|
2015-06-18 12:45:29 +08:00
|
|
|
// Walk clauses and process safelen/lastprivate.
|
2015-12-31 14:52:34 +08:00
|
|
|
LoopStack.setParallel(!IsMonotonic);
|
2018-04-14 01:31:06 +08:00
|
|
|
LoopStack.setVectorizeEnable();
|
2015-12-31 14:52:34 +08:00
|
|
|
emitSimdlenSafelenClause(*this, D, IsMonotonic);
|
2015-06-18 12:45:29 +08:00
|
|
|
}
|
|
|
|
|
2016-03-09 17:49:09 +08:00
|
|
|
void CodeGenFunction::EmitOMPSimdFinal(
|
|
|
|
const OMPLoopDirective &D,
|
2018-04-14 01:31:06 +08:00
|
|
|
const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!HaveInsertPoint())
|
|
|
|
return;
|
2016-03-09 17:49:09 +08:00
|
|
|
llvm::BasicBlock *DoneBB = nullptr;
|
2015-06-18 12:45:29 +08:00
|
|
|
auto IC = D.counters().begin();
|
2016-04-22 11:56:56 +08:00
|
|
|
auto IPC = D.private_counters().begin();
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *F : D.finals()) {
|
|
|
|
const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
|
|
|
|
const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
|
|
|
|
const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
|
2016-04-22 11:56:56 +08:00
|
|
|
if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
|
|
|
|
OrigVD->hasGlobalStorage() || CED) {
|
2016-03-09 17:49:09 +08:00
|
|
|
if (!DoneBB) {
|
2018-04-14 01:31:06 +08:00
|
|
|
if (llvm::Value *Cond = CondGen(*this)) {
|
2016-03-09 17:49:09 +08:00
|
|
|
// If the first post-update expression is found, emit conditional
|
|
|
|
// block if it was requested.
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
|
2016-03-09 17:49:09 +08:00
|
|
|
DoneBB = createBasicBlock(".omp.final.done");
|
|
|
|
Builder.CreateCondBr(Cond, ThenBB, DoneBB);
|
|
|
|
EmitBlock(ThenBB);
|
|
|
|
}
|
|
|
|
}
|
2016-04-22 11:56:56 +08:00
|
|
|
Address OrigAddr = Address::invalid();
|
2018-03-08 02:17:06 +08:00
|
|
|
if (CED) {
|
2016-04-22 11:56:56 +08:00
|
|
|
OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
|
2018-03-08 02:17:06 +08:00
|
|
|
} else {
|
2018-12-21 22:10:18 +08:00
|
|
|
DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
|
2016-04-22 11:56:56 +08:00
|
|
|
/*RefersToEnclosingVariableOrCapture=*/false,
|
|
|
|
(*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
|
|
|
|
OrigAddr = EmitLValue(&DRE).getAddress();
|
|
|
|
}
|
2015-06-18 12:45:29 +08:00
|
|
|
OMPPrivateScope VarScope(*this);
|
2018-04-14 01:31:06 +08:00
|
|
|
VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
|
2015-06-18 12:45:29 +08:00
|
|
|
(void)VarScope.Privatize();
|
|
|
|
EmitIgnoredExpr(F);
|
|
|
|
}
|
|
|
|
++IC;
|
2016-04-22 11:56:56 +08:00
|
|
|
++IPC;
|
2015-06-18 12:45:29 +08:00
|
|
|
}
|
2016-03-09 17:49:09 +08:00
|
|
|
if (DoneBB)
|
|
|
|
EmitBlock(DoneBB, /*IsFinished=*/true);
|
2015-06-18 12:45:29 +08:00
|
|
|
}
|
|
|
|
|
2017-04-26 01:52:12 +08:00
|
|
|
static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
|
|
|
|
const OMPLoopDirective &S,
|
|
|
|
CodeGenFunction::JumpDest LoopExit) {
|
|
|
|
CGF.EmitOMPLoopBody(S, LoopExit);
|
|
|
|
CGF.EmitStopPoint(&S);
|
2017-04-28 01:02:25 +08:00
|
|
|
}
|
2017-04-26 01:52:12 +08:00
|
|
|
|
2017-12-30 02:07:07 +08:00
|
|
|
/// Emit a helper variable and return corresponding lvalue.
|
|
|
|
static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
|
|
|
|
const DeclRefExpr *Helper) {
|
|
|
|
auto VDecl = cast<VarDecl>(Helper->getDecl());
|
|
|
|
CGF.EmitVarDecl(*VDecl);
|
|
|
|
return CGF.EmitLValue(Helper);
|
|
|
|
}
|
|
|
|
|
2017-11-18 01:57:25 +08:00
|
|
|
static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
|
|
|
|
PrePostActionTy &Action) {
|
|
|
|
Action.Enter(CGF);
|
|
|
|
assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
|
|
|
|
"Expected simd directive");
|
|
|
|
OMPLoopScope PreInitScope(CGF, S);
|
|
|
|
// if (PreCond) {
|
|
|
|
// for (IV in 0..LastIteration) BODY;
|
|
|
|
// <Final counter/linear vars updates>;
|
|
|
|
// }
|
|
|
|
//
|
2017-12-30 02:07:07 +08:00
|
|
|
if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
|
|
|
|
isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
|
|
|
|
isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
|
|
|
|
(void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
|
|
|
|
(void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
|
|
|
|
}
|
2015-03-21 18:12:56 +08:00
|
|
|
|
2017-11-18 01:57:25 +08:00
|
|
|
// Emit: if (PreCond) - begin.
|
|
|
|
// If the condition constant folds and can be elided, avoid emitting the
|
|
|
|
// whole loop.
|
|
|
|
bool CondConstant;
|
|
|
|
llvm::BasicBlock *ContBlock = nullptr;
|
|
|
|
if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
|
|
|
|
if (!CondConstant)
|
|
|
|
return;
|
|
|
|
} else {
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
|
2017-11-18 01:57:25 +08:00
|
|
|
ContBlock = CGF.createBasicBlock("simd.if.end");
|
|
|
|
emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
|
|
|
|
CGF.getProfileCount(&S));
|
|
|
|
CGF.EmitBlock(ThenBlock);
|
|
|
|
CGF.incrementProfileCounter(&S);
|
|
|
|
}
|
2014-10-01 14:03:56 +08:00
|
|
|
|
2017-11-18 01:57:25 +08:00
|
|
|
// Emit the loop iteration variable.
|
|
|
|
const Expr *IVExpr = S.getIterationVariable();
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
|
2017-11-18 01:57:25 +08:00
|
|
|
CGF.EmitVarDecl(*IVDecl);
|
|
|
|
CGF.EmitIgnoredExpr(S.getInit());
|
|
|
|
|
|
|
|
// Emit the iterations count variable.
|
|
|
|
// If it is not a variable, Sema decided to calculate iterations count on
|
|
|
|
// each iteration (e.g., it is foldable into a constant).
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
|
2017-11-18 01:57:25 +08:00
|
|
|
CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
|
|
|
|
// Emit calculation of the iterations count.
|
|
|
|
CGF.EmitIgnoredExpr(S.getCalcLastIteration());
|
|
|
|
}
|
|
|
|
|
|
|
|
CGF.EmitOMPSimdInit(S);
|
|
|
|
|
|
|
|
emitAlignedClause(CGF, S);
|
|
|
|
(void)CGF.EmitOMPLinearClauseInit(S);
|
|
|
|
{
|
|
|
|
CodeGenFunction::OMPPrivateScope LoopScope(CGF);
|
|
|
|
CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
|
|
|
|
CGF.EmitOMPLinearClause(S, LoopScope);
|
|
|
|
CGF.EmitOMPPrivateClause(S, LoopScope);
|
|
|
|
CGF.EmitOMPReductionClauseInit(S, LoopScope);
|
|
|
|
bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
|
|
|
|
(void)LoopScope.Privatize();
|
2018-10-30 23:50:12 +08:00
|
|
|
if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
|
|
|
|
CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
|
2017-11-18 01:57:25 +08:00
|
|
|
CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
|
|
|
|
S.getInc(),
|
|
|
|
[&S](CodeGenFunction &CGF) {
|
|
|
|
CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest());
|
|
|
|
CGF.EmitStopPoint(&S);
|
|
|
|
},
|
|
|
|
[](CodeGenFunction &) {});
|
2018-04-14 01:31:06 +08:00
|
|
|
CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
|
2017-11-18 01:57:25 +08:00
|
|
|
// Emit final copy of the lastprivate variables at the end of loops.
|
|
|
|
if (HasLastprivateClause)
|
|
|
|
CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
|
|
|
|
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
|
2018-04-14 01:31:06 +08:00
|
|
|
emitPostUpdateForReductionClause(CGF, S,
|
|
|
|
[](CodeGenFunction &) { return nullptr; });
|
2017-11-18 01:57:25 +08:00
|
|
|
}
|
2018-04-14 01:31:06 +08:00
|
|
|
CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
|
2017-11-18 01:57:25 +08:00
|
|
|
// Emit: if (PreCond) - end.
|
|
|
|
if (ContBlock) {
|
|
|
|
CGF.EmitBranch(ContBlock);
|
|
|
|
CGF.EmitBlock(ContBlock, true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
emitOMPSimdRegion(CGF, S, Action);
|
2015-04-10 12:50:10 +08:00
|
|
|
};
|
2018-01-13 03:39:11 +08:00
|
|
|
OMPLexicalScope Scope(*this, S, OMPD_unknown);
|
2015-07-03 17:56:58 +08:00
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
|
2014-05-22 16:54:05 +08:00
|
|
|
}
|
|
|
|
|
2017-04-26 01:52:12 +08:00
|
|
|
void CodeGenFunction::EmitOMPOuterLoop(
|
|
|
|
bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
|
|
|
|
CodeGenFunction::OMPPrivateScope &LoopScope,
|
|
|
|
const CodeGenFunction::OMPLoopArguments &LoopArgs,
|
|
|
|
const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
|
|
|
|
const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
|
2018-04-14 01:31:06 +08:00
|
|
|
CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
|
2015-03-12 21:37:50 +08:00
|
|
|
|
2015-01-22 16:49:35 +08:00
|
|
|
const Expr *IVExpr = S.getIterationVariable();
|
|
|
|
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
|
|
|
|
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
|
|
|
|
|
2018-04-14 01:31:06 +08:00
|
|
|
JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
|
2015-01-22 16:49:35 +08:00
|
|
|
|
|
|
|
// Start the loop with a block that tests the condition.
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
|
2015-01-22 16:49:35 +08:00
|
|
|
EmitBlock(CondBlock);
|
2018-04-14 01:31:06 +08:00
|
|
|
const SourceRange R = S.getSourceRange();
|
2016-11-10 22:44:30 +08:00
|
|
|
LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
|
|
|
|
SourceLocToDebugLoc(R.getEnd()));
|
2015-01-22 16:49:35 +08:00
|
|
|
|
|
|
|
llvm::Value *BoolCondVal = nullptr;
|
2015-05-20 21:12:48 +08:00
|
|
|
if (!DynamicOrOrdered) {
|
2017-04-26 01:52:12 +08:00
|
|
|
// UB = min(UB, GlobalUB) or
|
|
|
|
// UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
|
|
|
|
// 'distribute parallel for')
|
|
|
|
EmitIgnoredExpr(LoopArgs.EUB);
|
2015-03-12 21:37:50 +08:00
|
|
|
// IV = LB
|
2017-04-26 01:52:12 +08:00
|
|
|
EmitIgnoredExpr(LoopArgs.Init);
|
2015-03-12 21:37:50 +08:00
|
|
|
// IV < UB
|
2017-04-26 01:52:12 +08:00
|
|
|
BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
|
2015-03-12 21:37:50 +08:00
|
|
|
} else {
|
2017-04-26 01:52:12 +08:00
|
|
|
BoolCondVal =
|
2018-08-10 05:08:08 +08:00
|
|
|
RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
|
2017-04-26 01:52:12 +08:00
|
|
|
LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
|
2015-03-12 21:37:50 +08:00
|
|
|
}
|
2015-01-22 16:49:35 +08:00
|
|
|
|
|
|
|
// If there are any cleanups between here and the loop-exit scope,
|
|
|
|
// create a block to stage a loop exit along.
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
|
2015-01-22 16:49:35 +08:00
|
|
|
if (LoopScope.requiresCleanups())
|
|
|
|
ExitBlock = createBasicBlock("omp.dispatch.cleanup");
|
|
|
|
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
|
2015-01-22 16:49:35 +08:00
|
|
|
Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
|
|
|
|
if (ExitBlock != LoopExit.getBlock()) {
|
|
|
|
EmitBlock(ExitBlock);
|
|
|
|
EmitBranchThroughCleanup(LoopExit);
|
|
|
|
}
|
|
|
|
EmitBlock(LoopBody);
|
|
|
|
|
2015-03-12 21:37:50 +08:00
|
|
|
// Emit "IV = LB" (in case of static schedule, we have already calculated new
|
|
|
|
// LB for loop condition and emitted it above).
|
2015-05-20 21:12:48 +08:00
|
|
|
if (DynamicOrOrdered)
|
2017-04-26 01:52:12 +08:00
|
|
|
EmitIgnoredExpr(LoopArgs.Init);
|
2015-03-12 21:37:50 +08:00
|
|
|
|
2015-01-22 16:49:35 +08:00
|
|
|
// Create a block for the increment.
|
2018-04-14 01:31:06 +08:00
|
|
|
JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
|
2015-01-22 16:49:35 +08:00
|
|
|
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
|
|
|
|
|
2015-06-18 12:45:29 +08:00
|
|
|
// Generate !llvm.loop.parallel metadata for loads and stores for loops
|
|
|
|
// with dynamic/guided scheduling and without ordered clause.
|
2015-12-31 14:52:34 +08:00
|
|
|
if (!isOpenMPSimdDirective(S.getDirectiveKind()))
|
|
|
|
LoopStack.setParallel(!IsMonotonic);
|
|
|
|
else
|
|
|
|
EmitOMPSimdInit(S, IsMonotonic);
|
2015-06-18 12:45:29 +08:00
|
|
|
|
2018-08-10 05:08:08 +08:00
|
|
|
SourceLocation Loc = S.getBeginLoc();
|
2017-04-26 01:52:12 +08:00
|
|
|
|
|
|
|
// when 'distribute' is not combined with a 'for':
|
|
|
|
// while (idx <= UB) { BODY; ++idx; }
|
|
|
|
// when 'distribute' is combined with a 'for'
|
|
|
|
// (e.g. 'distribute parallel for')
|
|
|
|
// while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
|
|
|
|
EmitOMPInnerLoop(
|
|
|
|
S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
|
|
|
|
[&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
|
|
|
|
CodeGenLoop(CGF, S, LoopExit);
|
|
|
|
},
|
|
|
|
[IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
|
|
|
|
CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
|
|
|
|
});
|
2015-01-22 16:49:35 +08:00
|
|
|
|
|
|
|
EmitBlock(Continue.getBlock());
|
|
|
|
BreakContinueStack.pop_back();
|
2015-05-20 21:12:48 +08:00
|
|
|
if (!DynamicOrOrdered) {
|
2015-03-12 21:37:50 +08:00
|
|
|
// Emit "LB = LB + Stride", "UB = UB + Stride".
|
2017-04-26 01:52:12 +08:00
|
|
|
EmitIgnoredExpr(LoopArgs.NextLB);
|
|
|
|
EmitIgnoredExpr(LoopArgs.NextUB);
|
2015-03-12 21:37:50 +08:00
|
|
|
}
|
2015-01-22 16:49:35 +08:00
|
|
|
|
|
|
|
EmitBranch(CondBlock);
|
|
|
|
LoopStack.pop();
|
|
|
|
// Emit the fall-through block.
|
|
|
|
EmitBlock(LoopExit.getBlock());
|
|
|
|
|
|
|
|
// Tell the runtime we are done.
|
2016-11-17 23:12:05 +08:00
|
|
|
auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
|
|
|
|
if (!DynamicOrOrdered)
|
2018-08-10 05:09:38 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
|
2017-09-07 00:17:35 +08:00
|
|
|
S.getDirectiveKind());
|
2016-11-17 23:12:05 +08:00
|
|
|
};
|
|
|
|
OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
|
2016-03-08 00:04:49 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPForOuterLoop(
|
2016-05-10 17:57:36 +08:00
|
|
|
const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
|
2016-03-08 00:04:49 +08:00
|
|
|
const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
|
2017-04-26 01:52:12 +08:00
|
|
|
const OMPLoopArguments &LoopArgs,
|
|
|
|
const CodeGenDispatchBoundsTy &CGDispatchBounds) {
|
2018-04-14 01:31:06 +08:00
|
|
|
CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
|
2016-03-08 00:04:49 +08:00
|
|
|
|
|
|
|
// Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
|
2016-05-10 17:57:36 +08:00
|
|
|
const bool DynamicOrOrdered =
|
|
|
|
Ordered || RT.isDynamic(ScheduleKind.Schedule);
|
2016-03-08 00:04:49 +08:00
|
|
|
|
|
|
|
assert((Ordered ||
|
2016-05-10 17:57:36 +08:00
|
|
|
!RT.isStaticNonchunked(ScheduleKind.Schedule,
|
2017-04-26 01:52:12 +08:00
|
|
|
LoopArgs.Chunk != nullptr)) &&
|
2016-03-08 00:04:49 +08:00
|
|
|
"static non-chunked schedule does not need outer loop");
|
|
|
|
|
|
|
|
// Emit outer loop.
|
|
|
|
//
|
|
|
|
// OpenMP [2.7.1, Loop Construct, Description, table 2-1]
|
|
|
|
// When schedule(dynamic,chunk_size) is specified, the iterations are
|
|
|
|
// distributed to threads in the team in chunks as the threads request them.
|
|
|
|
// Each thread executes a chunk of iterations, then requests another chunk,
|
|
|
|
// until no chunks remain to be distributed. Each chunk contains chunk_size
|
|
|
|
// iterations, except for the last chunk to be distributed, which may have
|
|
|
|
// fewer iterations. When no chunk_size is specified, it defaults to 1.
|
|
|
|
//
|
|
|
|
// When schedule(guided,chunk_size) is specified, the iterations are assigned
|
|
|
|
// to threads in the team in chunks as the executing threads request them.
|
|
|
|
// Each thread executes a chunk of iterations, then requests another chunk,
|
|
|
|
// until no chunks remain to be assigned. For a chunk_size of 1, the size of
|
|
|
|
// each chunk is proportional to the number of unassigned iterations divided
|
|
|
|
// by the number of threads in the team, decreasing to 1. For a chunk_size
|
|
|
|
// with value k (greater than 1), the size of each chunk is determined in the
|
|
|
|
// same way, with the restriction that the chunks do not contain fewer than k
|
|
|
|
// iterations (except for the last chunk to be assigned, which may have fewer
|
|
|
|
// than k iterations).
|
|
|
|
//
|
|
|
|
// When schedule(auto) is specified, the decision regarding scheduling is
|
|
|
|
// delegated to the compiler and/or runtime system. The programmer gives the
|
|
|
|
// implementation the freedom to choose any possible mapping of iterations to
|
|
|
|
// threads in the team.
|
|
|
|
//
|
|
|
|
// When schedule(runtime) is specified, the decision regarding scheduling is
|
|
|
|
// deferred until run time, and the schedule and chunk size are taken from the
|
|
|
|
// run-sched-var ICV. If the ICV is set to auto, the schedule is
|
|
|
|
// implementation defined
|
|
|
|
//
|
|
|
|
// while(__kmpc_dispatch_next(&LB, &UB)) {
|
|
|
|
// idx = LB;
|
|
|
|
// while (idx <= UB) { BODY; ++idx;
|
|
|
|
// __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
|
|
|
|
// } // inner loop
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// OpenMP [2.7.1, Loop Construct, Description, table 2-1]
|
|
|
|
// When schedule(static, chunk_size) is specified, iterations are divided into
|
|
|
|
// chunks of size chunk_size, and the chunks are assigned to the threads in
|
|
|
|
// the team in a round-robin fashion in the order of the thread number.
|
|
|
|
//
|
|
|
|
// while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
|
|
|
|
// while (idx <= UB) { BODY; ++idx; } // inner loop
|
|
|
|
// LB = LB + ST;
|
|
|
|
// UB = UB + ST;
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
|
|
|
|
const Expr *IVExpr = S.getIterationVariable();
|
|
|
|
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
|
|
|
|
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
|
|
|
|
|
|
|
|
if (DynamicOrOrdered) {
|
2018-04-14 01:31:06 +08:00
|
|
|
const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
|
|
|
|
CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
|
2017-04-26 01:52:12 +08:00
|
|
|
llvm::Value *LBVal = DispatchBounds.first;
|
|
|
|
llvm::Value *UBVal = DispatchBounds.second;
|
|
|
|
CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
|
|
|
|
LoopArgs.Chunk};
|
2018-08-10 05:08:08 +08:00
|
|
|
RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
|
2017-04-26 01:52:12 +08:00
|
|
|
IVSigned, Ordered, DipatchRTInputValues);
|
2016-03-08 00:04:49 +08:00
|
|
|
} else {
|
2017-08-15 01:56:13 +08:00
|
|
|
CGOpenMPRuntime::StaticRTInput StaticInit(
|
|
|
|
IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
|
|
|
|
LoopArgs.ST, LoopArgs.Chunk);
|
2018-08-10 05:08:08 +08:00
|
|
|
RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
|
2017-08-15 01:56:13 +08:00
|
|
|
ScheduleKind, StaticInit);
|
2016-03-08 00:04:49 +08:00
|
|
|
}
|
|
|
|
|
2017-04-26 01:52:12 +08:00
|
|
|
auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
|
|
|
|
const unsigned IVSize,
|
|
|
|
const bool IVSigned) {
|
|
|
|
if (Ordered) {
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
|
|
|
|
IVSigned);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
|
|
|
|
LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
|
|
|
|
OuterLoopArgs.IncExpr = S.getInc();
|
|
|
|
OuterLoopArgs.Init = S.getInit();
|
|
|
|
OuterLoopArgs.Cond = S.getCond();
|
|
|
|
OuterLoopArgs.NextLB = S.getNextLowerBound();
|
|
|
|
OuterLoopArgs.NextUB = S.getNextUpperBound();
|
|
|
|
EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
|
|
|
|
emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
|
2016-03-08 00:04:49 +08:00
|
|
|
}
|
|
|
|
|
2017-04-26 01:52:12 +08:00
|
|
|
static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
|
|
|
|
const unsigned IVSize, const bool IVSigned) {}
|
|
|
|
|
2016-03-08 00:04:49 +08:00
|
|
|
void CodeGenFunction::EmitOMPDistributeOuterLoop(
|
2017-04-26 01:52:12 +08:00
|
|
|
OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
|
|
|
|
OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
|
|
|
|
const CodeGenLoopTy &CodeGenLoopContent) {
|
2016-03-08 00:04:49 +08:00
|
|
|
|
2018-04-14 01:31:06 +08:00
|
|
|
CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
|
2016-03-08 00:04:49 +08:00
|
|
|
|
|
|
|
// Emit outer loop.
|
|
|
|
// Same behavior as a OMPForOuterLoop, except that schedule cannot be
|
|
|
|
// dynamic
|
|
|
|
//
|
|
|
|
|
|
|
|
const Expr *IVExpr = S.getIterationVariable();
|
|
|
|
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
|
|
|
|
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
|
|
|
|
|
2017-08-15 01:56:13 +08:00
|
|
|
CGOpenMPRuntime::StaticRTInput StaticInit(
|
|
|
|
IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
|
|
|
|
LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
|
2018-08-10 05:08:08 +08:00
|
|
|
RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
|
2017-04-26 01:52:12 +08:00
|
|
|
|
|
|
|
// for combined 'distribute' and 'for' the increment expression of distribute
|
2018-10-29 23:45:47 +08:00
|
|
|
// is stored in DistInc. For 'distribute' alone, it is in Inc.
|
2017-04-26 01:52:12 +08:00
|
|
|
Expr *IncExpr;
|
|
|
|
if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
|
|
|
|
IncExpr = S.getDistInc();
|
|
|
|
else
|
|
|
|
IncExpr = S.getInc();
|
|
|
|
|
|
|
|
// this routine is shared by 'omp distribute parallel for' and
|
|
|
|
// 'omp distribute': select the right EUB expression depending on the
|
|
|
|
// directive
|
|
|
|
OMPLoopArguments OuterLoopArgs;
|
|
|
|
OuterLoopArgs.LB = LoopArgs.LB;
|
|
|
|
OuterLoopArgs.UB = LoopArgs.UB;
|
|
|
|
OuterLoopArgs.ST = LoopArgs.ST;
|
|
|
|
OuterLoopArgs.IL = LoopArgs.IL;
|
|
|
|
OuterLoopArgs.Chunk = LoopArgs.Chunk;
|
|
|
|
OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
|
|
|
|
? S.getCombinedEnsureUpperBound()
|
|
|
|
: S.getEnsureUpperBound();
|
|
|
|
OuterLoopArgs.IncExpr = IncExpr;
|
|
|
|
OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
|
|
|
|
? S.getCombinedInit()
|
|
|
|
: S.getInit();
|
|
|
|
OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
|
|
|
|
? S.getCombinedCond()
|
|
|
|
: S.getCond();
|
|
|
|
OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
|
|
|
|
? S.getCombinedNextLowerBound()
|
|
|
|
: S.getNextLowerBound();
|
|
|
|
OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
|
|
|
|
? S.getCombinedNextUpperBound()
|
|
|
|
: S.getNextUpperBound();
|
|
|
|
|
|
|
|
EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
|
|
|
|
LoopScope, OuterLoopArgs, CodeGenLoopContent,
|
|
|
|
emitEmptyOrdered);
|
|
|
|
}
|
|
|
|
|
|
|
|
static std::pair<LValue, LValue>
|
|
|
|
emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
|
|
|
|
const OMPExecutableDirective &S) {
|
|
|
|
const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
|
|
|
|
LValue LB =
|
|
|
|
EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
|
|
|
|
LValue UB =
|
|
|
|
EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
|
|
|
|
|
|
|
|
// When composing 'distribute' with 'for' (e.g. as in 'distribute
|
|
|
|
// parallel for') we need to use the 'distribute'
|
|
|
|
// chunk lower and upper bounds rather than the whole loop iteration
|
|
|
|
// space. These are parameters to the outlined function for 'parallel'
|
|
|
|
// and we copy the bounds of the previous schedule into the
|
|
|
|
// the current ones.
|
|
|
|
LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
|
|
|
|
LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
|
2018-01-24 02:12:38 +08:00
|
|
|
llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
|
|
|
|
PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
|
2017-04-26 01:52:12 +08:00
|
|
|
PrevLBVal = CGF.EmitScalarConversion(
|
|
|
|
PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
|
2018-01-24 02:12:38 +08:00
|
|
|
LS.getIterationVariable()->getType(),
|
|
|
|
LS.getPrevLowerBoundVariable()->getExprLoc());
|
|
|
|
llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
|
|
|
|
PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
|
2017-04-26 01:52:12 +08:00
|
|
|
PrevUBVal = CGF.EmitScalarConversion(
|
|
|
|
PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
|
2018-01-24 02:12:38 +08:00
|
|
|
LS.getIterationVariable()->getType(),
|
|
|
|
LS.getPrevUpperBoundVariable()->getExprLoc());
|
2017-04-26 01:52:12 +08:00
|
|
|
|
|
|
|
CGF.EmitStoreOfScalar(PrevLBVal, LB);
|
|
|
|
CGF.EmitStoreOfScalar(PrevUBVal, UB);
|
|
|
|
|
|
|
|
return {LB, UB};
|
|
|
|
}
|
2016-03-08 00:04:49 +08:00
|
|
|
|
2017-04-26 01:52:12 +08:00
|
|
|
/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
|
|
|
|
/// we need to use the LB and UB expressions generated by the worksharing
|
|
|
|
/// code generation support, whereas in non combined situations we would
|
|
|
|
/// just emit 0 and the LastIteration expression
|
|
|
|
/// This function is necessary due to the difference of the LB and UB
|
|
|
|
/// types for the RT emission routines for 'for_static_init' and
|
|
|
|
/// 'for_dispatch_init'
|
|
|
|
static std::pair<llvm::Value *, llvm::Value *>
|
|
|
|
emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
|
|
|
|
const OMPExecutableDirective &S,
|
|
|
|
Address LB, Address UB) {
|
|
|
|
const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
|
|
|
|
const Expr *IVExpr = LS.getIterationVariable();
|
|
|
|
// when implementing a dynamic schedule for a 'for' combined with a
|
|
|
|
// 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
|
|
|
|
// is not normalized as each team only executes its own assigned
|
|
|
|
// distribute chunk
|
|
|
|
QualType IteratorTy = IVExpr->getType();
|
2018-01-24 02:12:38 +08:00
|
|
|
llvm::Value *LBVal =
|
2018-08-10 05:08:08 +08:00
|
|
|
CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
|
2018-01-24 02:12:38 +08:00
|
|
|
llvm::Value *UBVal =
|
2018-08-10 05:08:08 +08:00
|
|
|
CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
|
2017-04-26 01:52:12 +08:00
|
|
|
return {LBVal, UBVal};
|
2017-04-28 01:02:25 +08:00
|
|
|
}
|
2017-04-26 01:52:12 +08:00
|
|
|
|
|
|
|
static void emitDistributeParallelForDistributeInnerBoundParams(
|
|
|
|
CodeGenFunction &CGF, const OMPExecutableDirective &S,
|
|
|
|
llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
|
|
|
|
const auto &Dir = cast<OMPLoopDirective>(S);
|
|
|
|
LValue LB =
|
|
|
|
CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::Value *LBCast = CGF.Builder.CreateIntCast(
|
2017-04-26 01:52:12 +08:00
|
|
|
CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
|
|
|
|
CapturedVars.push_back(LBCast);
|
|
|
|
LValue UB =
|
|
|
|
CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
|
|
|
|
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::Value *UBCast = CGF.Builder.CreateIntCast(
|
2017-04-26 01:52:12 +08:00
|
|
|
CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
|
|
|
|
CapturedVars.push_back(UBCast);
|
2017-04-28 01:02:25 +08:00
|
|
|
}
|
2017-04-26 01:52:12 +08:00
|
|
|
|
|
|
|
static void
|
|
|
|
emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
|
|
|
|
const OMPLoopDirective &S,
|
|
|
|
CodeGenFunction::JumpDest LoopExit) {
|
|
|
|
auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
|
2018-03-16 02:10:54 +08:00
|
|
|
PrePostActionTy &Action) {
|
|
|
|
Action.Enter(CGF);
|
2017-11-23 04:19:50 +08:00
|
|
|
bool HasCancel = false;
|
|
|
|
if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
|
|
|
|
if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
|
|
|
|
HasCancel = D->hasCancel();
|
|
|
|
else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
|
|
|
|
HasCancel = D->hasCancel();
|
2017-11-23 05:12:03 +08:00
|
|
|
else if (const auto *D =
|
|
|
|
dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
|
|
|
|
HasCancel = D->hasCancel();
|
2017-11-23 04:19:50 +08:00
|
|
|
}
|
|
|
|
CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
|
|
|
|
HasCancel);
|
2017-04-26 01:52:12 +08:00
|
|
|
CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
|
|
|
|
emitDistributeParallelForInnerBounds,
|
|
|
|
emitDistributeParallelForDispatchBounds);
|
|
|
|
};
|
|
|
|
|
|
|
|
emitCommonOMPParallelDirective(
|
2017-11-23 04:19:50 +08:00
|
|
|
CGF, S,
|
|
|
|
isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
|
|
|
|
CGInlinedWorksharingLoop,
|
2017-04-26 01:52:12 +08:00
|
|
|
emitDistributeParallelForDistributeInnerBoundParams);
|
2015-01-22 16:49:35 +08:00
|
|
|
}
|
|
|
|
|
2016-06-27 22:55:37 +08:00
|
|
|
void CodeGenFunction::EmitOMPDistributeParallelForDirective(
|
|
|
|
const OMPDistributeParallelForDirective &S) {
|
2017-04-26 01:52:12 +08:00
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
|
|
|
|
CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
|
|
|
|
S.getDistInc());
|
|
|
|
};
|
2018-01-13 03:39:11 +08:00
|
|
|
OMPLexicalScope Scope(*this, S, OMPD_parallel);
|
2017-11-28 00:54:08 +08:00
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
|
2016-06-27 22:55:37 +08:00
|
|
|
}
|
|
|
|
|
2016-07-05 13:00:15 +08:00
|
|
|
void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
|
|
|
|
const OMPDistributeParallelForSimdDirective &S) {
|
2017-11-28 03:38:58 +08:00
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
|
|
|
|
CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
|
|
|
|
S.getDistInc());
|
|
|
|
};
|
2018-01-13 03:39:11 +08:00
|
|
|
OMPLexicalScope Scope(*this, S, OMPD_parallel);
|
2017-11-28 03:38:58 +08:00
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
|
2016-07-05 13:00:15 +08:00
|
|
|
}
|
2016-07-06 12:45:38 +08:00
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPDistributeSimdDirective(
|
|
|
|
const OMPDistributeSimdDirective &S) {
|
2017-12-04 23:38:33 +08:00
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
|
|
|
|
CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
|
|
|
|
};
|
2018-01-13 03:39:11 +08:00
|
|
|
OMPLexicalScope Scope(*this, S, OMPD_unknown);
|
2017-12-04 23:38:33 +08:00
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
|
2016-07-06 12:45:38 +08:00
|
|
|
}
|
|
|
|
|
2017-11-18 01:57:25 +08:00
|
|
|
void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
|
|
|
|
CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
|
|
|
|
// Emit SPMD target parallel for region as a standalone region.
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
emitOMPSimdRegion(CGF, S, Action);
|
|
|
|
};
|
|
|
|
llvm::Function *Fn;
|
|
|
|
llvm::Constant *Addr;
|
|
|
|
// Emit target region as a standalone region.
|
|
|
|
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
|
|
|
|
S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
|
|
|
|
assert(Fn && Addr && "Target device function emission failed.");
|
|
|
|
}
|
|
|
|
|
2016-07-21 06:57:10 +08:00
|
|
|
void CodeGenFunction::EmitOMPTargetSimdDirective(
|
|
|
|
const OMPTargetSimdDirective &S) {
|
2017-11-18 01:57:25 +08:00
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
emitOMPSimdRegion(CGF, S, Action);
|
|
|
|
};
|
|
|
|
emitCommonOMPTargetDirective(*this, S, CodeGen);
|
2016-07-21 06:57:10 +08:00
|
|
|
}
|
|
|
|
|
2015-12-31 14:52:34 +08:00
|
|
|
namespace {
|
|
|
|
struct ScheduleKindModifiersTy {
|
|
|
|
OpenMPScheduleClauseKind Kind;
|
|
|
|
OpenMPScheduleClauseModifier M1;
|
|
|
|
OpenMPScheduleClauseModifier M2;
|
|
|
|
ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
|
|
|
|
OpenMPScheduleClauseModifier M1,
|
|
|
|
OpenMPScheduleClauseModifier M2)
|
|
|
|
: Kind(Kind), M1(M1), M2(M2) {}
|
|
|
|
};
|
|
|
|
} // namespace
|
|
|
|
|
2017-04-26 01:52:12 +08:00
|
|
|
bool CodeGenFunction::EmitOMPWorksharingLoop(
|
|
|
|
const OMPLoopDirective &S, Expr *EUB,
|
|
|
|
const CodeGenLoopBoundsTy &CodeGenLoopBounds,
|
|
|
|
const CodeGenDispatchBoundsTy &CGDispatchBounds) {
|
2014-12-15 15:07:06 +08:00
|
|
|
// Emit the loop iteration variable.
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
|
|
|
|
const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
|
2014-12-15 15:07:06 +08:00
|
|
|
EmitVarDecl(*IVDecl);
|
|
|
|
|
|
|
|
// Emit the iterations count variable.
|
|
|
|
// If it is not a variable, Sema decided to calculate iterations count on each
|
|
|
|
// iteration (e.g., it is foldable into a constant).
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
|
2014-12-15 15:07:06 +08:00
|
|
|
EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
|
|
|
|
// Emit calculation of the iterations count.
|
|
|
|
EmitIgnoredExpr(S.getCalcLastIteration());
|
|
|
|
}
|
|
|
|
|
2018-04-14 01:31:06 +08:00
|
|
|
CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
|
2014-12-15 15:07:06 +08:00
|
|
|
|
2015-04-16 12:54:05 +08:00
|
|
|
bool HasLastprivateClause;
|
2014-12-15 15:07:06 +08:00
|
|
|
// Check pre-condition.
|
|
|
|
{
|
2016-03-29 16:58:54 +08:00
|
|
|
OMPLoopScope PreInitScope(*this, S);
|
2014-12-15 15:07:06 +08:00
|
|
|
// Skip the entire loop if we don't meet the precondition.
|
2015-04-22 19:59:37 +08:00
|
|
|
// If the condition constant folds and can be elided, avoid emitting the
|
|
|
|
// whole loop.
|
|
|
|
bool CondConstant;
|
|
|
|
llvm::BasicBlock *ContBlock = nullptr;
|
|
|
|
if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
|
|
|
|
if (!CondConstant)
|
|
|
|
return false;
|
|
|
|
} else {
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
|
2015-04-22 19:59:37 +08:00
|
|
|
ContBlock = createBasicBlock("omp.precond.end");
|
|
|
|
emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
|
2015-04-24 07:06:47 +08:00
|
|
|
getProfileCount(&S));
|
2015-04-22 19:59:37 +08:00
|
|
|
EmitBlock(ThenBlock);
|
2015-04-24 07:06:47 +08:00
|
|
|
incrementProfileCounter(&S);
|
2015-04-22 19:59:37 +08:00
|
|
|
}
|
2015-06-18 12:45:29 +08:00
|
|
|
|
2018-02-16 07:39:43 +08:00
|
|
|
RunCleanupsScope DoacrossCleanupScope(*this);
|
2016-05-25 20:36:08 +08:00
|
|
|
bool Ordered = false;
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
|
2016-05-25 20:36:08 +08:00
|
|
|
if (OrderedClause->getNumForLoops())
|
2018-08-14 03:04:24 +08:00
|
|
|
RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
|
2016-05-25 20:36:08 +08:00
|
|
|
else
|
|
|
|
Ordered = true;
|
|
|
|
}
|
|
|
|
|
2016-04-22 11:56:56 +08:00
|
|
|
llvm::DenseSet<const Expr *> EmittedFinals;
|
2015-06-18 12:45:29 +08:00
|
|
|
emitAlignedClause(*this, S);
|
2017-08-16 23:58:46 +08:00
|
|
|
bool HasLinears = EmitOMPLinearClauseInit(S);
|
2016-03-09 17:49:09 +08:00
|
|
|
// Emit helper vars inits.
|
2017-04-26 01:52:12 +08:00
|
|
|
|
|
|
|
std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
|
|
|
|
LValue LB = Bounds.first;
|
|
|
|
LValue UB = Bounds.second;
|
2016-03-09 17:49:09 +08:00
|
|
|
LValue ST =
|
|
|
|
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
|
|
|
|
LValue IL =
|
|
|
|
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
|
|
|
|
|
2014-12-15 15:07:06 +08:00
|
|
|
// Emit 'then' code.
|
|
|
|
{
|
|
|
|
OMPPrivateScope LoopScope(*this);
|
2017-08-16 23:58:46 +08:00
|
|
|
if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
|
2015-04-15 12:52:20 +08:00
|
|
|
// Emit implicit barrier to synchronize threads and avoid data races on
|
2016-02-15 16:07:17 +08:00
|
|
|
// initialization of firstprivate variables and post-update of
|
|
|
|
// lastprivate variables.
|
2015-09-15 20:52:43 +08:00
|
|
|
CGM.getOpenMPRuntime().emitBarrierCall(
|
2018-08-10 05:08:08 +08:00
|
|
|
*this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
|
2015-09-15 20:52:43 +08:00
|
|
|
/*ForceSimpleCall=*/true);
|
2015-04-15 12:52:20 +08:00
|
|
|
}
|
2015-04-22 20:24:45 +08:00
|
|
|
EmitOMPPrivateClause(S, LoopScope);
|
2015-04-16 12:54:05 +08:00
|
|
|
HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
|
[OPENMP] Codegen for 'reduction' clause in 'for' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
*(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
...
*(Type<n>-1*)lhs[<n>-1] =
ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
*(Type<n>-1*)rhs[<n>-1]);
}
...
void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
<LHSExprs>[0] = ReductionOperation0(*<LHSExprs>[0], *<RHSExprs>[0]);
...
<LHSExprs>[<n>-1] = ReductionOperation<n>-1(*<LHSExprs>[<n>-1], *<RHSExprs>[<n>-1]);
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
Atomic(<LHSExprs>[0] = ReductionOperation0(*<LHSExprs>[0], *<RHSExprs>[0]));
...
Atomic(<LHSExprs>[<n>-1] = ReductionOperation<n>-1(*<LHSExprs>[<n>-1], *<RHSExprs>[<n>-1]));
break;
default:;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D9139
llvm-svn: 235506
2015-04-22 21:43:03 +08:00
|
|
|
EmitOMPReductionClauseInit(S, LoopScope);
|
2016-04-22 11:56:56 +08:00
|
|
|
EmitOMPPrivateLoopCounters(S, LoopScope);
|
|
|
|
EmitOMPLinearClause(S, LoopScope);
|
2015-03-16 15:14:41 +08:00
|
|
|
(void)LoopScope.Privatize();
|
2018-10-30 23:50:12 +08:00
|
|
|
if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
|
|
|
|
CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
|
2014-12-15 15:07:06 +08:00
|
|
|
|
|
|
|
// Detect the loop schedule kind and chunk.
|
2018-10-29 23:45:47 +08:00
|
|
|
const Expr *ChunkExpr = nullptr;
|
2016-05-10 17:57:36 +08:00
|
|
|
OpenMPScheduleTy ScheduleKind;
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
|
2016-05-10 17:57:36 +08:00
|
|
|
ScheduleKind.Schedule = C->getScheduleKind();
|
|
|
|
ScheduleKind.M1 = C->getFirstScheduleModifier();
|
|
|
|
ScheduleKind.M2 = C->getSecondScheduleModifier();
|
2018-10-29 23:45:47 +08:00
|
|
|
ChunkExpr = C->getChunkSize();
|
2018-09-28 04:29:00 +08:00
|
|
|
} else {
|
|
|
|
// Default behaviour for schedule clause.
|
|
|
|
CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
|
2018-10-29 23:45:47 +08:00
|
|
|
*this, S, ScheduleKind.Schedule, ChunkExpr);
|
|
|
|
}
|
|
|
|
bool HasChunkSizeOne = false;
|
|
|
|
llvm::Value *Chunk = nullptr;
|
|
|
|
if (ChunkExpr) {
|
|
|
|
Chunk = EmitScalarExpr(ChunkExpr);
|
|
|
|
Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
|
|
|
|
S.getIterationVariable()->getType(),
|
|
|
|
S.getBeginLoc());
|
2018-12-01 07:41:18 +08:00
|
|
|
Expr::EvalResult Result;
|
|
|
|
if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
|
|
|
|
llvm::APSInt EvaluatedChunk = Result.Val.getInt();
|
2018-10-29 23:45:47 +08:00
|
|
|
HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
|
2018-12-01 07:41:18 +08:00
|
|
|
}
|
2016-02-16 19:18:12 +08:00
|
|
|
}
|
2014-12-15 15:07:06 +08:00
|
|
|
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
|
|
|
|
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
|
2015-12-31 14:52:34 +08:00
|
|
|
// OpenMP 4.5, 2.7.1 Loop Construct, Description.
|
|
|
|
// If the static schedule kind is specified or if the ordered clause is
|
|
|
|
// specified, and if no monotonic modifier is specified, the effect will
|
|
|
|
// be as if the monotonic modifier was specified.
|
2018-10-29 23:45:47 +08:00
|
|
|
bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule,
|
|
|
|
/* Chunked */ Chunk != nullptr) && HasChunkSizeOne &&
|
|
|
|
isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
|
|
|
|
if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
|
|
|
|
/* Chunked */ Chunk != nullptr) ||
|
|
|
|
StaticChunkedOne) &&
|
2015-05-20 21:12:48 +08:00
|
|
|
!Ordered) {
|
2015-12-31 14:52:34 +08:00
|
|
|
if (isOpenMPSimdDirective(S.getDirectiveKind()))
|
|
|
|
EmitOMPSimdInit(S, /*IsMonotonic=*/true);
|
2014-12-15 15:07:06 +08:00
|
|
|
// OpenMP [2.7.1, Loop Construct, Description, table 2-1]
|
|
|
|
// When no chunk_size is specified, the iteration space is divided into
|
|
|
|
// chunks that are approximately equal in size, and at most one chunk is
|
|
|
|
// distributed to each thread. Note that the size of the chunks is
|
|
|
|
// unspecified in this case.
|
2017-08-15 01:56:13 +08:00
|
|
|
CGOpenMPRuntime::StaticRTInput StaticInit(
|
|
|
|
IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
|
2018-10-29 23:45:47 +08:00
|
|
|
UB.getAddress(), ST.getAddress(),
|
|
|
|
StaticChunkedOne ? Chunk : nullptr);
|
2018-08-10 05:08:08 +08:00
|
|
|
RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
|
2017-08-15 01:56:13 +08:00
|
|
|
ScheduleKind, StaticInit);
|
2018-04-14 01:31:06 +08:00
|
|
|
JumpDest LoopExit =
|
2015-12-31 14:52:34 +08:00
|
|
|
getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
|
2014-12-15 15:07:06 +08:00
|
|
|
// UB = min(UB, GlobalUB);
|
2018-10-29 23:45:47 +08:00
|
|
|
if (!StaticChunkedOne)
|
|
|
|
EmitIgnoredExpr(S.getEnsureUpperBound());
|
2014-12-15 15:07:06 +08:00
|
|
|
// IV = LB;
|
|
|
|
EmitIgnoredExpr(S.getInit());
|
2018-10-29 23:45:47 +08:00
|
|
|
// For unchunked static schedule generate:
|
|
|
|
//
|
|
|
|
// while (idx <= UB) {
|
|
|
|
// BODY;
|
|
|
|
// ++idx;
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// For static schedule with chunk one:
|
|
|
|
//
|
|
|
|
// while (IV <= PrevUB) {
|
|
|
|
// BODY;
|
|
|
|
// IV += ST;
|
|
|
|
// }
|
|
|
|
EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
|
|
|
|
StaticChunkedOne ? S.getCombinedParForInDistCond() : S.getCond(),
|
|
|
|
StaticChunkedOne ? S.getDistInc() : S.getInc(),
|
|
|
|
[&S, LoopExit](CodeGenFunction &CGF) {
|
|
|
|
CGF.EmitOMPLoopBody(S, LoopExit);
|
|
|
|
CGF.EmitStopPoint(&S);
|
|
|
|
},
|
|
|
|
[](CodeGenFunction &) {});
|
2015-07-02 12:17:07 +08:00
|
|
|
EmitBlock(LoopExit.getBlock());
|
2014-12-15 15:07:06 +08:00
|
|
|
// Tell the runtime we are done.
|
2016-11-17 23:12:05 +08:00
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
2018-08-10 05:09:38 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
|
2017-09-07 00:17:35 +08:00
|
|
|
S.getDirectiveKind());
|
2016-11-17 23:12:05 +08:00
|
|
|
};
|
|
|
|
OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
|
2015-01-22 16:49:35 +08:00
|
|
|
} else {
|
2016-05-10 17:57:36 +08:00
|
|
|
const bool IsMonotonic =
|
|
|
|
Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
|
|
|
|
ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
|
|
|
|
ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
|
|
|
|
ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
|
2015-01-22 16:49:35 +08:00
|
|
|
// Emit the outer loop, which requests its work chunk [LB..UB] from
|
|
|
|
// runtime and runs the inner loop to process it.
|
2017-04-26 01:52:12 +08:00
|
|
|
const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
|
|
|
|
ST.getAddress(), IL.getAddress(),
|
|
|
|
Chunk, EUB);
|
2015-12-31 14:52:34 +08:00
|
|
|
EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
|
2017-04-26 01:52:12 +08:00
|
|
|
LoopArguments, CGDispatchBounds);
|
2015-01-22 16:49:35 +08:00
|
|
|
}
|
2016-04-22 11:56:56 +08:00
|
|
|
if (isOpenMPSimdDirective(S.getDirectiveKind())) {
|
2018-08-10 05:08:08 +08:00
|
|
|
EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
|
|
|
|
return CGF.Builder.CreateIsNotNull(
|
|
|
|
CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
|
|
|
|
});
|
2016-04-22 11:56:56 +08:00
|
|
|
}
|
2017-02-17 00:20:16 +08:00
|
|
|
EmitOMPReductionClauseFinal(
|
|
|
|
S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
|
|
|
|
? /*Parallel and Simd*/ OMPD_parallel_for_simd
|
|
|
|
: /*Parallel only*/ OMPD_parallel);
|
2016-03-02 12:57:40 +08:00
|
|
|
// Emit post-update of the reduction variables if IsLastIter != 0.
|
|
|
|
emitPostUpdateForReductionClause(
|
2018-04-14 01:31:06 +08:00
|
|
|
*this, S, [IL, &S](CodeGenFunction &CGF) {
|
2016-03-02 12:57:40 +08:00
|
|
|
return CGF.Builder.CreateIsNotNull(
|
2018-08-10 05:08:08 +08:00
|
|
|
CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
|
2016-03-02 12:57:40 +08:00
|
|
|
});
|
2015-04-16 12:54:05 +08:00
|
|
|
// Emit final copy of the lastprivate variables if IsLastIter != 0.
|
|
|
|
if (HasLastprivateClause)
|
|
|
|
EmitOMPLastprivateClauseFinal(
|
2016-04-22 11:56:56 +08:00
|
|
|
S, isOpenMPSimdDirective(S.getDirectiveKind()),
|
2018-08-10 05:08:08 +08:00
|
|
|
Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
|
2015-06-18 12:45:29 +08:00
|
|
|
}
|
2018-04-14 01:31:06 +08:00
|
|
|
EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
|
2016-03-09 17:49:09 +08:00
|
|
|
return CGF.Builder.CreateIsNotNull(
|
2018-08-10 05:08:08 +08:00
|
|
|
CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
|
2016-03-09 17:49:09 +08:00
|
|
|
});
|
2018-02-16 07:39:43 +08:00
|
|
|
DoacrossCleanupScope.ForceCleanup();
|
2014-12-15 15:07:06 +08:00
|
|
|
// We're now done with the loop, so jump to the continuation block.
|
2015-04-22 19:59:37 +08:00
|
|
|
if (ContBlock) {
|
|
|
|
EmitBranch(ContBlock);
|
2018-04-14 01:31:06 +08:00
|
|
|
EmitBlock(ContBlock, /*IsFinished=*/true);
|
2015-04-22 19:59:37 +08:00
|
|
|
}
|
2014-12-15 15:07:06 +08:00
|
|
|
}
|
2015-04-16 12:54:05 +08:00
|
|
|
return HasLastprivateClause;
|
2014-12-15 15:07:06 +08:00
|
|
|
}
|
|
|
|
|
2017-04-26 01:52:12 +08:00
|
|
|
/// The following two functions generate expressions for the loop lower
|
|
|
|
/// and upper bounds in case of static and dynamic (dispatch) schedule
|
|
|
|
/// of the associated 'for' or 'distribute' loop.
|
|
|
|
static std::pair<LValue, LValue>
|
|
|
|
emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto &LS = cast<OMPLoopDirective>(S);
|
2017-04-26 01:52:12 +08:00
|
|
|
LValue LB =
|
|
|
|
EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
|
|
|
|
LValue UB =
|
|
|
|
EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
|
|
|
|
return {LB, UB};
|
|
|
|
}
|
|
|
|
|
|
|
|
/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
|
|
|
|
/// consider the lower and upper bound expressions generated by the
|
|
|
|
/// worksharing loop support, but we use 0 and the iteration space size as
|
|
|
|
/// constants
|
|
|
|
static std::pair<llvm::Value *, llvm::Value *>
|
|
|
|
emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
|
|
|
|
Address LB, Address UB) {
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto &LS = cast<OMPLoopDirective>(S);
|
2017-04-26 01:52:12 +08:00
|
|
|
const Expr *IVExpr = LS.getIterationVariable();
|
|
|
|
const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
|
|
|
|
llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
|
|
|
|
llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
|
|
|
|
return {LBVal, UBVal};
|
|
|
|
}
|
|
|
|
|
2014-12-15 15:07:06 +08:00
|
|
|
void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
|
2015-04-16 12:54:05 +08:00
|
|
|
bool HasLastprivates = false;
|
2016-03-29 13:34:15 +08:00
|
|
|
auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
|
|
|
|
PrePostActionTy &) {
|
2016-11-17 23:12:05 +08:00
|
|
|
OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
|
2017-04-26 01:52:12 +08:00
|
|
|
HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
|
|
|
|
emitForLoopBounds,
|
|
|
|
emitDispatchForLoopBounds);
|
2016-03-29 13:34:15 +08:00
|
|
|
};
|
2016-02-16 19:18:12 +08:00
|
|
|
{
|
2018-01-13 03:39:11 +08:00
|
|
|
OMPLexicalScope Scope(*this, S, OMPD_unknown);
|
2016-02-16 19:18:12 +08:00
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
|
|
|
|
S.hasCancel());
|
|
|
|
}
|
2014-12-15 15:07:06 +08:00
|
|
|
|
|
|
|
// Emit an implicit barrier at the end.
|
2018-04-14 01:31:06 +08:00
|
|
|
if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
|
2018-08-10 05:08:08 +08:00
|
|
|
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
|
2014-06-18 12:14:57 +08:00
|
|
|
}
|
2014-06-25 19:44:49 +08:00
|
|
|
|
2015-06-17 15:45:51 +08:00
|
|
|
void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
|
|
|
|
bool HasLastprivates = false;
|
2016-03-29 13:34:15 +08:00
|
|
|
auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
|
|
|
|
PrePostActionTy &) {
|
2017-04-26 01:52:12 +08:00
|
|
|
HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
|
|
|
|
emitForLoopBounds,
|
|
|
|
emitDispatchForLoopBounds);
|
2016-03-29 13:34:15 +08:00
|
|
|
};
|
2016-02-16 19:18:12 +08:00
|
|
|
{
|
2018-01-13 03:39:11 +08:00
|
|
|
OMPLexicalScope Scope(*this, S, OMPD_unknown);
|
2016-02-16 19:18:12 +08:00
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
|
|
|
|
}
|
2015-06-17 15:45:51 +08:00
|
|
|
|
|
|
|
// Emit an implicit barrier at the end.
|
2018-04-14 01:31:06 +08:00
|
|
|
if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
|
2018-08-10 05:08:08 +08:00
|
|
|
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
|
2014-09-18 13:12:34 +08:00
|
|
|
}
|
|
|
|
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
|
|
|
|
const Twine &Name,
|
|
|
|
llvm::Value *Init = nullptr) {
|
2018-04-14 01:31:06 +08:00
|
|
|
LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
if (Init)
|
2016-10-19 03:05:41 +08:00
|
|
|
CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
return LVal;
|
|
|
|
}
|
|
|
|
|
2016-02-16 19:18:12 +08:00
|
|
|
void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
|
2018-04-14 01:31:06 +08:00
|
|
|
const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
|
|
|
|
const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
|
2016-01-22 16:56:50 +08:00
|
|
|
bool HasLastprivates = false;
|
2018-04-14 01:31:06 +08:00
|
|
|
auto &&CodeGen = [&S, CapturedStmt, CS,
|
|
|
|
&HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
|
|
|
|
ASTContext &C = CGF.getContext();
|
|
|
|
QualType KmpInt32Ty =
|
|
|
|
C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
|
2016-01-22 16:56:50 +08:00
|
|
|
// Emit helper vars inits.
|
|
|
|
LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
|
|
|
|
CGF.Builder.getInt32(0));
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::ConstantInt *GlobalUBVal = CS != nullptr
|
|
|
|
? CGF.Builder.getInt32(CS->size() - 1)
|
|
|
|
: CGF.Builder.getInt32(0);
|
2016-01-22 16:56:50 +08:00
|
|
|
LValue UB =
|
|
|
|
createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
|
|
|
|
LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
|
|
|
|
CGF.Builder.getInt32(1));
|
|
|
|
LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
|
|
|
|
CGF.Builder.getInt32(0));
|
|
|
|
// Loop counter.
|
|
|
|
LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
|
2018-08-10 05:08:08 +08:00
|
|
|
OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
|
2016-01-22 16:56:50 +08:00
|
|
|
CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
|
2018-08-10 05:08:08 +08:00
|
|
|
OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
|
2016-01-22 16:56:50 +08:00
|
|
|
CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
|
|
|
|
// Generate condition for loop.
|
|
|
|
BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
|
2018-08-10 05:08:08 +08:00
|
|
|
OK_Ordinary, S.getBeginLoc(), FPOptions());
|
2016-01-22 16:56:50 +08:00
|
|
|
// Increment for loop counter.
|
|
|
|
UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
|
2018-08-10 05:08:08 +08:00
|
|
|
S.getBeginLoc(), true);
|
2018-04-14 01:31:06 +08:00
|
|
|
auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
|
2016-01-22 16:56:50 +08:00
|
|
|
// Iterate through all sections and emit a switch construct:
|
|
|
|
// switch (IV) {
|
|
|
|
// case 0:
|
|
|
|
// <SectionStmt[0]>;
|
|
|
|
// break;
|
|
|
|
// ...
|
|
|
|
// case <NumSection> - 1:
|
|
|
|
// <SectionStmt[<NumSection> - 1]>;
|
|
|
|
// break;
|
|
|
|
// }
|
|
|
|
// .omp.sections.exit:
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
|
|
|
|
llvm::SwitchInst *SwitchStmt =
|
2018-08-10 05:08:08 +08:00
|
|
|
CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
|
2018-01-24 02:12:38 +08:00
|
|
|
ExitBB, CS == nullptr ? 1 : CS->size());
|
2016-01-22 16:56:50 +08:00
|
|
|
if (CS) {
|
2015-04-10 12:50:10 +08:00
|
|
|
unsigned CaseNumber = 0;
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Stmt *SubStmt : CS->children()) {
|
2015-04-10 12:50:10 +08:00
|
|
|
auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
|
|
|
|
CGF.EmitBlock(CaseBB);
|
|
|
|
SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
|
2015-07-03 05:03:14 +08:00
|
|
|
CGF.EmitStmt(SubStmt);
|
2015-04-10 12:50:10 +08:00
|
|
|
CGF.EmitBranch(ExitBB);
|
2015-07-03 05:03:14 +08:00
|
|
|
++CaseNumber;
|
2015-04-10 12:50:10 +08:00
|
|
|
}
|
2016-01-22 16:56:50 +08:00
|
|
|
} else {
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
|
2016-01-22 16:56:50 +08:00
|
|
|
CGF.EmitBlock(CaseBB);
|
|
|
|
SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
|
2018-04-14 01:31:06 +08:00
|
|
|
CGF.EmitStmt(CapturedStmt);
|
2016-01-22 16:56:50 +08:00
|
|
|
CGF.EmitBranch(ExitBB);
|
2015-04-24 11:37:03 +08:00
|
|
|
}
|
2016-01-22 16:56:50 +08:00
|
|
|
CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
};
|
2015-04-10 12:50:10 +08:00
|
|
|
|
2016-01-22 16:56:50 +08:00
|
|
|
CodeGenFunction::OMPPrivateScope LoopScope(CGF);
|
|
|
|
if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
|
2015-04-27 12:34:03 +08:00
|
|
|
// Emit implicit barrier to synchronize threads and avoid data races on
|
2016-02-15 16:07:17 +08:00
|
|
|
// initialization of firstprivate variables and post-update of lastprivate
|
|
|
|
// variables.
|
2016-01-22 16:56:50 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitBarrierCall(
|
2018-08-10 05:08:08 +08:00
|
|
|
CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
|
2016-01-22 16:56:50 +08:00
|
|
|
/*ForceSimpleCall=*/true);
|
2015-04-27 12:34:03 +08:00
|
|
|
}
|
2016-01-22 16:56:50 +08:00
|
|
|
CGF.EmitOMPPrivateClause(S, LoopScope);
|
|
|
|
HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
|
|
|
|
CGF.EmitOMPReductionClauseInit(S, LoopScope);
|
|
|
|
(void)LoopScope.Privatize();
|
2018-10-30 23:50:12 +08:00
|
|
|
if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
|
|
|
|
CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
|
2016-01-22 16:56:50 +08:00
|
|
|
|
|
|
|
// Emit static non-chunked loop.
|
2016-05-10 17:57:36 +08:00
|
|
|
OpenMPScheduleTy ScheduleKind;
|
|
|
|
ScheduleKind.Schedule = OMPC_SCHEDULE_static;
|
2017-08-15 01:56:13 +08:00
|
|
|
CGOpenMPRuntime::StaticRTInput StaticInit(
|
|
|
|
/*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
|
|
|
|
LB.getAddress(), UB.getAddress(), ST.getAddress());
|
2016-01-22 16:56:50 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitForStaticInit(
|
2018-08-10 05:08:08 +08:00
|
|
|
CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
|
2016-01-22 16:56:50 +08:00
|
|
|
// UB = min(UB, GlobalUB);
|
2018-08-10 05:08:08 +08:00
|
|
|
llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
|
2016-01-22 16:56:50 +08:00
|
|
|
CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
|
|
|
|
CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
|
|
|
|
// IV = LB;
|
2018-08-10 05:08:08 +08:00
|
|
|
CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
|
2016-01-22 16:56:50 +08:00
|
|
|
// while (idx <= UB) { BODY; ++idx; }
|
|
|
|
CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
|
|
|
|
[](CodeGenFunction &) {});
|
|
|
|
// Tell the runtime we are done.
|
2016-11-17 23:12:05 +08:00
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
2018-08-10 05:09:38 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
|
2017-09-07 00:17:35 +08:00
|
|
|
S.getDirectiveKind());
|
2016-11-17 23:12:05 +08:00
|
|
|
};
|
|
|
|
CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
|
2017-02-17 00:20:16 +08:00
|
|
|
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
|
2016-03-02 12:57:40 +08:00
|
|
|
// Emit post-update of the reduction variables if IsLastIter != 0.
|
2018-04-14 01:31:06 +08:00
|
|
|
emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
|
|
|
|
return CGF.Builder.CreateIsNotNull(
|
2018-08-10 05:08:08 +08:00
|
|
|
CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
|
2018-04-14 01:31:06 +08:00
|
|
|
});
|
2015-04-24 11:37:03 +08:00
|
|
|
|
2016-01-22 16:56:50 +08:00
|
|
|
// Emit final copy of the lastprivate variables if IsLastIter != 0.
|
|
|
|
if (HasLastprivates)
|
|
|
|
CGF.EmitOMPLastprivateClauseFinal(
|
2016-04-22 11:56:56 +08:00
|
|
|
S, /*NoFinals=*/false,
|
|
|
|
CGF.Builder.CreateIsNotNull(
|
2018-08-10 05:08:08 +08:00
|
|
|
CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
|
2015-04-14 11:29:22 +08:00
|
|
|
};
|
2016-01-22 16:56:50 +08:00
|
|
|
|
|
|
|
bool HasCancel = false;
|
|
|
|
if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
|
|
|
|
HasCancel = OSD->hasCancel();
|
|
|
|
else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
|
|
|
|
HasCancel = OPSD->hasCancel();
|
2016-11-17 23:12:05 +08:00
|
|
|
OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
|
2016-01-22 16:56:50 +08:00
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
|
|
|
|
HasCancel);
|
|
|
|
// Emit barrier for lastprivates only if 'sections' directive has 'nowait'
|
|
|
|
// clause. Otherwise the barrier will be generated by the codegen for the
|
|
|
|
// directive.
|
|
|
|
if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
|
2015-04-24 11:37:03 +08:00
|
|
|
// Emit implicit barrier to synchronize threads and avoid data races on
|
|
|
|
// initialization of firstprivate variables.
|
2018-08-10 05:08:08 +08:00
|
|
|
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
|
2016-01-22 16:56:50 +08:00
|
|
|
OMPD_unknown);
|
2015-04-24 11:37:03 +08:00
|
|
|
}
|
2015-04-14 11:29:22 +08:00
|
|
|
}
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
|
2015-04-14 11:29:22 +08:00
|
|
|
void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
|
2016-02-16 19:18:12 +08:00
|
|
|
{
|
2018-01-13 03:39:11 +08:00
|
|
|
OMPLexicalScope Scope(*this, S, OMPD_unknown);
|
2016-02-16 19:18:12 +08:00
|
|
|
EmitSections(S);
|
|
|
|
}
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
// Emit an implicit barrier at the end.
|
2015-08-30 23:12:28 +08:00
|
|
|
if (!S.getSingleClause<OMPNowaitClause>()) {
|
2018-08-10 05:08:08 +08:00
|
|
|
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
|
2016-02-16 19:18:12 +08:00
|
|
|
OMPD_sections);
|
2015-03-30 12:30:22 +08:00
|
|
|
}
|
2014-06-25 19:44:49 +08:00
|
|
|
}
|
|
|
|
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
|
2016-03-29 13:34:15 +08:00
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
|
2018-01-13 03:39:11 +08:00
|
|
|
CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
|
2015-04-10 12:50:10 +08:00
|
|
|
};
|
2018-01-13 03:39:11 +08:00
|
|
|
OMPLexicalScope Scope(*this, S, OMPD_unknown);
|
2015-09-15 20:52:43 +08:00
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
|
|
|
|
S.hasCancel());
|
2014-06-26 16:21:58 +08:00
|
|
|
}
|
|
|
|
|
2015-02-05 14:35:41 +08:00
|
|
|
void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
|
2015-03-23 14:18:07 +08:00
|
|
|
llvm::SmallVector<const Expr *, 8> CopyprivateVars;
|
2015-04-14 13:11:24 +08:00
|
|
|
llvm::SmallVector<const Expr *, 8> DestExprs;
|
2015-03-23 14:18:07 +08:00
|
|
|
llvm::SmallVector<const Expr *, 8> SrcExprs;
|
|
|
|
llvm::SmallVector<const Expr *, 8> AssignmentOps;
|
2015-04-10 12:50:10 +08:00
|
|
|
// Check if there are any 'copyprivate' clauses associated with this
|
2016-02-15 16:07:17 +08:00
|
|
|
// 'single' construct.
|
2015-03-23 14:18:07 +08:00
|
|
|
// Build a list of copyprivate variables along with helper expressions
|
|
|
|
// (<source>, <destination>, <destination>=<source> expressions)
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
|
2015-03-23 14:18:07 +08:00
|
|
|
CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
|
2015-04-14 13:11:24 +08:00
|
|
|
DestExprs.append(C->destination_exprs().begin(),
|
|
|
|
C->destination_exprs().end());
|
2015-03-23 14:18:07 +08:00
|
|
|
SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
|
|
|
|
AssignmentOps.append(C->assignment_ops().begin(),
|
|
|
|
C->assignment_ops().end());
|
|
|
|
}
|
2016-03-29 13:34:15 +08:00
|
|
|
// Emit code for 'single' region along with 'copyprivate' clauses
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
Action.Enter(CGF);
|
|
|
|
OMPPrivateScope SingleScope(CGF);
|
|
|
|
(void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
|
|
|
|
CGF.EmitOMPPrivateClause(S, SingleScope);
|
|
|
|
(void)SingleScope.Privatize();
|
2018-01-13 03:39:11 +08:00
|
|
|
CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
|
2016-03-29 13:34:15 +08:00
|
|
|
};
|
2016-02-16 19:18:12 +08:00
|
|
|
{
|
2018-01-13 03:39:11 +08:00
|
|
|
OMPLexicalScope Scope(*this, S, OMPD_unknown);
|
2018-08-10 05:08:08 +08:00
|
|
|
CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
|
2016-02-16 19:18:12 +08:00
|
|
|
CopyprivateVars, DestExprs,
|
|
|
|
SrcExprs, AssignmentOps);
|
|
|
|
}
|
|
|
|
// Emit an implicit barrier at the end (to avoid data race on firstprivate
|
|
|
|
// init or if no 'nowait' clause was specified and no 'copyprivate' clause).
|
2016-02-17 21:19:37 +08:00
|
|
|
if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
|
2015-04-24 12:21:15 +08:00
|
|
|
CGM.getOpenMPRuntime().emitBarrierCall(
|
2018-08-10 05:08:08 +08:00
|
|
|
*this, S.getBeginLoc(),
|
2015-08-30 23:12:28 +08:00
|
|
|
S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
|
2015-03-30 12:30:22 +08:00
|
|
|
}
|
2014-06-26 20:05:45 +08:00
|
|
|
}
|
|
|
|
|
2014-12-04 15:23:53 +08:00
|
|
|
void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
|
2016-03-29 13:34:15 +08:00
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
Action.Enter(CGF);
|
2018-01-13 03:39:11 +08:00
|
|
|
CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
|
2015-04-10 12:50:10 +08:00
|
|
|
};
|
2018-01-13 03:39:11 +08:00
|
|
|
OMPLexicalScope Scope(*this, S, OMPD_unknown);
|
2018-08-10 05:08:08 +08:00
|
|
|
CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
|
2014-07-17 16:54:58 +08:00
|
|
|
}
|
|
|
|
|
2014-09-22 18:01:53 +08:00
|
|
|
void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
|
2016-03-29 13:34:15 +08:00
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
Action.Enter(CGF);
|
2018-01-13 03:39:11 +08:00
|
|
|
CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
|
2015-04-10 12:50:10 +08:00
|
|
|
};
|
2018-04-14 01:31:06 +08:00
|
|
|
const Expr *Hint = nullptr;
|
|
|
|
if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
|
2015-12-15 18:55:09 +08:00
|
|
|
Hint = HintClause->getHint();
|
2018-01-13 03:39:11 +08:00
|
|
|
OMPLexicalScope Scope(*this, S, OMPD_unknown);
|
2015-12-15 18:55:09 +08:00
|
|
|
CGM.getOpenMPRuntime().emitCriticalRegion(*this,
|
|
|
|
S.getDirectiveName().getAsString(),
|
2018-08-10 05:08:08 +08:00
|
|
|
CodeGen, S.getBeginLoc(), Hint);
|
2014-07-21 17:42:05 +08:00
|
|
|
}
|
|
|
|
|
2015-04-13 13:28:11 +08:00
|
|
|
void CodeGenFunction::EmitOMPParallelForDirective(
|
|
|
|
const OMPParallelForDirective &S) {
|
|
|
|
// Emit directive as a combined directive that consists of two implicit
|
|
|
|
// directives: 'parallel' with 'for' directive.
|
2018-03-16 02:10:54 +08:00
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
Action.Enter(CGF);
|
2016-11-17 23:12:05 +08:00
|
|
|
OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
|
2017-04-26 01:52:12 +08:00
|
|
|
CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
|
|
|
|
emitDispatchForLoopBounds);
|
2015-04-13 13:28:11 +08:00
|
|
|
};
|
2017-04-26 01:52:12 +08:00
|
|
|
emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
|
|
|
|
emitEmptyBoundParameters);
|
2014-07-07 21:01:15 +08:00
|
|
|
}
|
|
|
|
|
2014-09-23 17:33:00 +08:00
|
|
|
void CodeGenFunction::EmitOMPParallelForSimdDirective(
|
2015-06-18 18:10:12 +08:00
|
|
|
const OMPParallelForSimdDirective &S) {
|
|
|
|
// Emit directive as a combined directive that consists of two implicit
|
|
|
|
// directives: 'parallel' with 'for' directive.
|
2018-03-16 02:10:54 +08:00
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
Action.Enter(CGF);
|
2017-04-26 01:52:12 +08:00
|
|
|
CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
|
|
|
|
emitDispatchForLoopBounds);
|
2015-06-18 18:10:12 +08:00
|
|
|
};
|
2017-04-26 01:52:12 +08:00
|
|
|
emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
|
|
|
|
emitEmptyBoundParameters);
|
2014-09-23 17:33:00 +08:00
|
|
|
}
|
|
|
|
|
2014-07-08 16:12:03 +08:00
|
|
|
void CodeGenFunction::EmitOMPParallelSectionsDirective(
|
2015-04-14 11:29:22 +08:00
|
|
|
const OMPParallelSectionsDirective &S) {
|
|
|
|
// Emit directive as a combined directive that consists of two implicit
|
|
|
|
// directives: 'parallel' with 'sections' directive.
|
2018-03-16 02:10:54 +08:00
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
Action.Enter(CGF);
|
2016-03-29 13:34:15 +08:00
|
|
|
CGF.EmitSections(S);
|
|
|
|
};
|
2017-04-26 01:52:12 +08:00
|
|
|
emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
|
|
|
|
emitEmptyBoundParameters);
|
2014-07-08 16:12:03 +08:00
|
|
|
}
|
|
|
|
|
2018-01-13 03:39:11 +08:00
|
|
|
void CodeGenFunction::EmitOMPTaskBasedDirective(
|
|
|
|
const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
|
|
|
|
const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
|
|
|
|
OMPTaskDataTy &Data) {
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
// Emit outlined function for task construct.
|
2018-01-13 03:39:11 +08:00
|
|
|
const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
|
2018-04-14 01:31:06 +08:00
|
|
|
auto I = CS->getCapturedDecl()->param_begin();
|
|
|
|
auto PartId = std::next(I);
|
|
|
|
auto TaskT = std::next(I, 4);
|
2016-04-28 17:23:51 +08:00
|
|
|
// Check if the task is final
|
|
|
|
if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
|
|
|
|
// If the condition constant folds and can be elided, try to avoid emitting
|
|
|
|
// the condition and the dead arm of the if/else.
|
2018-04-14 01:31:06 +08:00
|
|
|
const Expr *Cond = Clause->getCondition();
|
2016-04-28 17:23:51 +08:00
|
|
|
bool CondConstant;
|
|
|
|
if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
|
|
|
|
Data.Final.setInt(CondConstant);
|
|
|
|
else
|
|
|
|
Data.Final.setPointer(EvaluateExprAsBool(Cond));
|
|
|
|
} else {
|
|
|
|
// By default the task is not final.
|
|
|
|
Data.Final.setInt(/*IntVal=*/false);
|
|
|
|
}
|
2016-05-10 20:21:02 +08:00
|
|
|
// Check if the task has 'priority' clause.
|
|
|
|
if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
|
2018-04-14 01:31:06 +08:00
|
|
|
const Expr *Prio = Clause->getPriority();
|
2016-07-19 12:21:09 +08:00
|
|
|
Data.Priority.setInt(/*IntVal=*/true);
|
2016-05-30 17:06:50 +08:00
|
|
|
Data.Priority.setPointer(EmitScalarConversion(
|
|
|
|
EmitScalarExpr(Prio), Prio->getType(),
|
|
|
|
getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
|
|
|
|
Prio->getExprLoc()));
|
2016-05-10 20:21:02 +08:00
|
|
|
}
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
// The first function argument for tasks is a thread id, the second one is a
|
|
|
|
// part id (0 for tied tasks, >=0 for untied task).
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
|
2015-04-30 14:51:57 +08:00
|
|
|
// Get list of private variables.
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
|
2015-04-30 14:51:57 +08:00
|
|
|
auto IRef = C->varlist_begin();
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *IInit : C->private_copies()) {
|
|
|
|
const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
|
2015-04-30 14:51:57 +08:00
|
|
|
if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
Data.PrivateVars.push_back(*IRef);
|
|
|
|
Data.PrivateCopies.push_back(IInit);
|
2015-04-30 14:51:57 +08:00
|
|
|
}
|
|
|
|
++IRef;
|
|
|
|
}
|
|
|
|
}
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
EmittedAsPrivate.clear();
|
|
|
|
// Get list of firstprivate variables.
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
auto IRef = C->varlist_begin();
|
|
|
|
auto IElemInitRef = C->inits().begin();
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *IInit : C->private_copies()) {
|
|
|
|
const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
Data.FirstprivateVars.push_back(*IRef);
|
|
|
|
Data.FirstprivateCopies.push_back(IInit);
|
|
|
|
Data.FirstprivateInits.push_back(*IElemInitRef);
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
}
|
2016-02-19 06:34:54 +08:00
|
|
|
++IRef;
|
|
|
|
++IElemInitRef;
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
}
|
|
|
|
}
|
2016-05-05 16:46:22 +08:00
|
|
|
// Get list of lastprivate variables (for taskloops).
|
|
|
|
llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
|
|
|
|
for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
|
|
|
|
auto IRef = C->varlist_begin();
|
|
|
|
auto ID = C->destination_exprs().begin();
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *IInit : C->private_copies()) {
|
|
|
|
const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
|
2016-05-05 16:46:22 +08:00
|
|
|
if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
|
|
|
|
Data.LastprivateVars.push_back(*IRef);
|
|
|
|
Data.LastprivateCopies.push_back(IInit);
|
|
|
|
}
|
|
|
|
LastprivateDstsOrigs.insert(
|
|
|
|
{cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
|
|
|
|
cast<DeclRefExpr>(*IRef)});
|
|
|
|
++IRef;
|
|
|
|
++ID;
|
|
|
|
}
|
|
|
|
}
|
2017-07-17 21:30:36 +08:00
|
|
|
SmallVector<const Expr *, 4> LHSs;
|
|
|
|
SmallVector<const Expr *, 4> RHSs;
|
|
|
|
for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
|
|
|
|
auto IPriv = C->privates().begin();
|
|
|
|
auto IRed = C->reduction_ops().begin();
|
|
|
|
auto ILHS = C->lhs_exprs().begin();
|
|
|
|
auto IRHS = C->rhs_exprs().begin();
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *Ref : C->varlists()) {
|
2017-07-17 21:30:36 +08:00
|
|
|
Data.ReductionVars.emplace_back(Ref);
|
|
|
|
Data.ReductionCopies.emplace_back(*IPriv);
|
|
|
|
Data.ReductionOps.emplace_back(*IRed);
|
|
|
|
LHSs.emplace_back(*ILHS);
|
|
|
|
RHSs.emplace_back(*IRHS);
|
|
|
|
std::advance(IPriv, 1);
|
|
|
|
std::advance(IRed, 1);
|
|
|
|
std::advance(ILHS, 1);
|
|
|
|
std::advance(IRHS, 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
|
2018-08-10 05:08:08 +08:00
|
|
|
*this, S.getBeginLoc(), LHSs, RHSs, Data);
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
// Build list of dependences.
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
for (const auto *C : S.getClausesOfKind<OMPDependClause>())
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *IRef : C->varlists())
|
2018-04-14 01:48:43 +08:00
|
|
|
Data.Dependences.emplace_back(C->getDependencyKind(), IRef);
|
2018-01-13 03:39:11 +08:00
|
|
|
auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
|
|
|
|
CapturedRegion](CodeGenFunction &CGF,
|
|
|
|
PrePostActionTy &Action) {
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
// Set proper addresses for generated private copies.
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
OMPPrivateScope Scope(CGF);
|
2016-05-05 16:46:22 +08:00
|
|
|
if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
|
|
|
|
!Data.LastprivateVars.empty()) {
|
2019-02-06 00:42:33 +08:00
|
|
|
llvm::FunctionType *CopyFnTy = llvm::FunctionType::get(
|
|
|
|
CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true);
|
2017-08-14 23:01:03 +08:00
|
|
|
enum { PrivatesParam = 2, CopyFnParam = 3 };
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::Value *CopyFn = CGF.Builder.CreateLoad(
|
|
|
|
CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
|
|
|
|
llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
|
|
|
|
CS->getCapturedDecl()->getParam(PrivatesParam)));
|
2016-04-20 12:01:36 +08:00
|
|
|
// Map privates.
|
|
|
|
llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
|
|
|
|
llvm::SmallVector<llvm::Value *, 16> CallArgs;
|
|
|
|
CallArgs.push_back(PrivatesPtr);
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *E : Data.PrivateVars) {
|
|
|
|
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
2016-04-20 12:01:36 +08:00
|
|
|
Address PrivatePtr = CGF.CreateMemTemp(
|
|
|
|
CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
|
2018-04-14 01:31:06 +08:00
|
|
|
PrivatePtrs.emplace_back(VD, PrivatePtr);
|
2016-04-20 12:01:36 +08:00
|
|
|
CallArgs.push_back(PrivatePtr.getPointer());
|
|
|
|
}
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *E : Data.FirstprivateVars) {
|
|
|
|
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
2016-04-20 12:01:36 +08:00
|
|
|
Address PrivatePtr =
|
|
|
|
CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
|
|
|
|
".firstpriv.ptr.addr");
|
2018-04-14 01:31:06 +08:00
|
|
|
PrivatePtrs.emplace_back(VD, PrivatePtr);
|
2016-04-20 12:01:36 +08:00
|
|
|
CallArgs.push_back(PrivatePtr.getPointer());
|
2016-03-28 18:12:03 +08:00
|
|
|
}
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *E : Data.LastprivateVars) {
|
|
|
|
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
2016-05-05 16:46:22 +08:00
|
|
|
Address PrivatePtr =
|
|
|
|
CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
|
|
|
|
".lastpriv.ptr.addr");
|
2018-04-14 01:31:06 +08:00
|
|
|
PrivatePtrs.emplace_back(VD, PrivatePtr);
|
2016-05-05 16:46:22 +08:00
|
|
|
CallArgs.push_back(PrivatePtr.getPointer());
|
|
|
|
}
|
2019-02-06 00:42:33 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
|
|
|
|
CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const auto &Pair : LastprivateDstsOrigs) {
|
|
|
|
const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
|
2018-12-21 22:10:18 +08:00
|
|
|
DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
|
|
|
|
/*RefersToEnclosingVariableOrCapture=*/
|
|
|
|
CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
|
|
|
|
Pair.second->getType(), VK_LValue,
|
|
|
|
Pair.second->getExprLoc());
|
2016-05-05 16:46:22 +08:00
|
|
|
Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
|
|
|
|
return CGF.EmitLValue(&DRE).getAddress();
|
|
|
|
});
|
|
|
|
}
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const auto &Pair : PrivatePtrs) {
|
2016-04-20 12:01:36 +08:00
|
|
|
Address Replacement(CGF.Builder.CreateLoad(Pair.second),
|
|
|
|
CGF.getContext().getDeclAlign(Pair.first));
|
|
|
|
Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
|
2016-03-28 20:58:34 +08:00
|
|
|
}
|
|
|
|
}
|
2017-07-17 21:30:36 +08:00
|
|
|
if (Data.Reductions) {
|
2018-01-13 03:39:11 +08:00
|
|
|
OMPLexicalScope LexScope(CGF, S, CapturedRegion);
|
2017-07-17 21:30:36 +08:00
|
|
|
ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
|
|
|
|
Data.ReductionOps);
|
|
|
|
llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
|
|
|
|
CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
|
|
|
|
for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
|
|
|
|
RedCG.emitSharedLValue(CGF, Cnt);
|
|
|
|
RedCG.emitAggregateType(CGF, Cnt);
|
2018-03-08 23:24:08 +08:00
|
|
|
// FIXME: This must removed once the runtime library is fixed.
|
|
|
|
// Emit required threadprivate variables for
|
Misc typos fixes in ./lib folder
Summary: Found via `codespell -q 3 -I ../clang-whitelist.txt -L uint,importd,crasher,gonna,cant,ue,ons,orign,ned`
Reviewers: teemperor
Reviewed By: teemperor
Subscribers: teemperor, jholewinski, jvesely, nhaehnle, whisperity, jfb, cfe-commits
Differential Revision: https://reviews.llvm.org/D55475
llvm-svn: 348755
2018-12-10 20:37:46 +08:00
|
|
|
// initializer/combiner/finalizer.
|
2018-08-10 05:08:08 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
|
2018-03-08 23:24:08 +08:00
|
|
|
RedCG, Cnt);
|
2017-07-17 21:30:36 +08:00
|
|
|
Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
|
2018-08-10 05:08:08 +08:00
|
|
|
CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
|
2017-07-17 21:30:36 +08:00
|
|
|
Replacement =
|
|
|
|
Address(CGF.EmitScalarConversion(
|
|
|
|
Replacement.getPointer(), CGF.getContext().VoidPtrTy,
|
|
|
|
CGF.getContext().getPointerType(
|
|
|
|
Data.ReductionCopies[Cnt]->getType()),
|
2018-01-24 02:12:38 +08:00
|
|
|
Data.ReductionCopies[Cnt]->getExprLoc()),
|
2017-07-17 21:30:36 +08:00
|
|
|
Replacement.getAlignment());
|
|
|
|
Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
|
|
|
|
Scope.addPrivate(RedCG.getBaseDecl(Cnt),
|
|
|
|
[Replacement]() { return Replacement; });
|
|
|
|
}
|
|
|
|
}
|
2017-07-27 21:20:36 +08:00
|
|
|
// Privatize all private variables except for in_reduction items.
|
2016-04-20 12:01:36 +08:00
|
|
|
(void)Scope.Privatize();
|
2017-07-27 21:20:36 +08:00
|
|
|
SmallVector<const Expr *, 4> InRedVars;
|
|
|
|
SmallVector<const Expr *, 4> InRedPrivs;
|
|
|
|
SmallVector<const Expr *, 4> InRedOps;
|
|
|
|
SmallVector<const Expr *, 4> TaskgroupDescriptors;
|
|
|
|
for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
|
|
|
|
auto IPriv = C->privates().begin();
|
|
|
|
auto IRed = C->reduction_ops().begin();
|
|
|
|
auto ITD = C->taskgroup_descriptors().begin();
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *Ref : C->varlists()) {
|
2017-07-27 21:20:36 +08:00
|
|
|
InRedVars.emplace_back(Ref);
|
|
|
|
InRedPrivs.emplace_back(*IPriv);
|
|
|
|
InRedOps.emplace_back(*IRed);
|
|
|
|
TaskgroupDescriptors.emplace_back(*ITD);
|
|
|
|
std::advance(IPriv, 1);
|
|
|
|
std::advance(IRed, 1);
|
|
|
|
std::advance(ITD, 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Privatize in_reduction items here, because taskgroup descriptors must be
|
|
|
|
// privatized earlier.
|
|
|
|
OMPPrivateScope InRedScope(CGF);
|
|
|
|
if (!InRedVars.empty()) {
|
|
|
|
ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
|
|
|
|
for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
|
|
|
|
RedCG.emitSharedLValue(CGF, Cnt);
|
|
|
|
RedCG.emitAggregateType(CGF, Cnt);
|
|
|
|
// The taskgroup descriptor variable is always implicit firstprivate and
|
Misc typos fixes in ./lib folder
Summary: Found via `codespell -q 3 -I ../clang-whitelist.txt -L uint,importd,crasher,gonna,cant,ue,ons,orign,ned`
Reviewers: teemperor
Reviewed By: teemperor
Subscribers: teemperor, jholewinski, jvesely, nhaehnle, whisperity, jfb, cfe-commits
Differential Revision: https://reviews.llvm.org/D55475
llvm-svn: 348755
2018-12-10 20:37:46 +08:00
|
|
|
// privatized already during processing of the firstprivates.
|
2018-03-08 23:24:08 +08:00
|
|
|
// FIXME: This must removed once the runtime library is fixed.
|
|
|
|
// Emit required threadprivate variables for
|
Misc typos fixes in ./lib folder
Summary: Found via `codespell -q 3 -I ../clang-whitelist.txt -L uint,importd,crasher,gonna,cant,ue,ons,orign,ned`
Reviewers: teemperor
Reviewed By: teemperor
Subscribers: teemperor, jholewinski, jvesely, nhaehnle, whisperity, jfb, cfe-commits
Differential Revision: https://reviews.llvm.org/D55475
llvm-svn: 348755
2018-12-10 20:37:46 +08:00
|
|
|
// initializer/combiner/finalizer.
|
2018-08-10 05:08:08 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
|
2018-03-08 23:24:08 +08:00
|
|
|
RedCG, Cnt);
|
2018-01-24 02:12:38 +08:00
|
|
|
llvm::Value *ReductionsPtr =
|
|
|
|
CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]),
|
|
|
|
TaskgroupDescriptors[Cnt]->getExprLoc());
|
2017-07-27 21:20:36 +08:00
|
|
|
Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
|
2018-08-10 05:08:08 +08:00
|
|
|
CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
|
2017-07-27 21:20:36 +08:00
|
|
|
Replacement = Address(
|
|
|
|
CGF.EmitScalarConversion(
|
|
|
|
Replacement.getPointer(), CGF.getContext().VoidPtrTy,
|
|
|
|
CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
|
2018-01-24 02:12:38 +08:00
|
|
|
InRedPrivs[Cnt]->getExprLoc()),
|
2017-07-27 21:20:36 +08:00
|
|
|
Replacement.getAlignment());
|
|
|
|
Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
|
|
|
|
InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
|
|
|
|
[Replacement]() { return Replacement; });
|
|
|
|
}
|
|
|
|
}
|
|
|
|
(void)InRedScope.Privatize();
|
2016-04-20 12:01:36 +08:00
|
|
|
|
|
|
|
Action.Enter(CGF);
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
BodyGen(CGF);
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
};
|
2019-02-06 00:42:33 +08:00
|
|
|
llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
|
|
|
|
Data.NumberOfParts);
|
|
|
|
OMPLexicalScope Scope(*this, S);
|
|
|
|
TaskGen(*this, OutlinedFn, Data);
|
|
|
|
}
|
|
|
|
|
2017-12-28 01:58:32 +08:00
|
|
|
static ImplicitParamDecl *
|
|
|
|
createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
|
2018-01-24 02:12:38 +08:00
|
|
|
QualType Ty, CapturedDecl *CD,
|
|
|
|
SourceLocation Loc) {
|
|
|
|
auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
|
|
|
|
ImplicitParamDecl::Other);
|
|
|
|
auto *OrigRef = DeclRefExpr::Create(
|
|
|
|
C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
|
|
|
|
/*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
|
|
|
|
auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
|
|
|
|
ImplicitParamDecl::Other);
|
2017-12-28 01:58:32 +08:00
|
|
|
auto *PrivateRef = DeclRefExpr::Create(
|
|
|
|
C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
|
2018-01-24 02:12:38 +08:00
|
|
|
/*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
|
2017-12-28 01:58:32 +08:00
|
|
|
QualType ElemType = C.getBaseElementType(Ty);
|
2018-01-24 02:12:38 +08:00
|
|
|
auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
|
|
|
|
ImplicitParamDecl::Other);
|
|
|
|
auto *InitRef = DeclRefExpr::Create(
|
|
|
|
C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
|
|
|
|
/*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
|
2017-12-28 01:58:32 +08:00
|
|
|
PrivateVD->setInitStyle(VarDecl::CInit);
|
|
|
|
PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
|
|
|
|
InitRef, /*BasePath=*/nullptr,
|
|
|
|
VK_RValue));
|
|
|
|
Data.FirstprivateVars.emplace_back(OrigRef);
|
|
|
|
Data.FirstprivateCopies.emplace_back(PrivateRef);
|
|
|
|
Data.FirstprivateInits.emplace_back(InitRef);
|
|
|
|
return OrigVD;
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
|
|
|
|
const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
|
|
|
|
OMPTargetDataInfo &InputInfo) {
|
|
|
|
// Emit outlined function for task construct.
|
2018-04-14 01:31:06 +08:00
|
|
|
const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
|
|
|
|
Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
|
|
|
|
QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
|
|
|
|
auto I = CS->getCapturedDecl()->param_begin();
|
|
|
|
auto PartId = std::next(I);
|
|
|
|
auto TaskT = std::next(I, 4);
|
2017-12-28 01:58:32 +08:00
|
|
|
OMPTaskDataTy Data;
|
|
|
|
// The task is not final.
|
|
|
|
Data.Final.setInt(/*IntVal=*/false);
|
|
|
|
// Get list of firstprivate variables.
|
|
|
|
for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
|
|
|
|
auto IRef = C->varlist_begin();
|
|
|
|
auto IElemInitRef = C->inits().begin();
|
|
|
|
for (auto *IInit : C->private_copies()) {
|
|
|
|
Data.FirstprivateVars.push_back(*IRef);
|
|
|
|
Data.FirstprivateCopies.push_back(IInit);
|
|
|
|
Data.FirstprivateInits.push_back(*IElemInitRef);
|
|
|
|
++IRef;
|
|
|
|
++IElemInitRef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
OMPPrivateScope TargetScope(*this);
|
|
|
|
VarDecl *BPVD = nullptr;
|
|
|
|
VarDecl *PVD = nullptr;
|
|
|
|
VarDecl *SVD = nullptr;
|
|
|
|
if (InputInfo.NumberOfTargetItems > 0) {
|
|
|
|
auto *CD = CapturedDecl::Create(
|
|
|
|
getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
|
|
|
|
llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
|
|
|
|
QualType BaseAndPointersType = getContext().getConstantArrayType(
|
|
|
|
getContext().VoidPtrTy, ArrSize, ArrayType::Normal,
|
|
|
|
/*IndexTypeQuals=*/0);
|
2018-01-24 02:12:38 +08:00
|
|
|
BPVD = createImplicitFirstprivateForType(
|
2018-08-10 05:08:08 +08:00
|
|
|
getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc());
|
2018-01-24 02:12:38 +08:00
|
|
|
PVD = createImplicitFirstprivateForType(
|
2018-08-10 05:08:08 +08:00
|
|
|
getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc());
|
2017-12-28 01:58:32 +08:00
|
|
|
QualType SizesType = getContext().getConstantArrayType(
|
2019-06-26 00:00:43 +08:00
|
|
|
getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
|
|
|
|
ArrSize, ArrayType::Normal,
|
2017-12-28 01:58:32 +08:00
|
|
|
/*IndexTypeQuals=*/0);
|
2018-01-24 02:12:38 +08:00
|
|
|
SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
|
2018-08-10 05:08:08 +08:00
|
|
|
S.getBeginLoc());
|
2017-12-28 01:58:32 +08:00
|
|
|
TargetScope.addPrivate(
|
|
|
|
BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
|
|
|
|
TargetScope.addPrivate(PVD,
|
|
|
|
[&InputInfo]() { return InputInfo.PointersArray; });
|
|
|
|
TargetScope.addPrivate(SVD,
|
|
|
|
[&InputInfo]() { return InputInfo.SizesArray; });
|
|
|
|
}
|
|
|
|
(void)TargetScope.Privatize();
|
|
|
|
// Build list of dependences.
|
|
|
|
for (const auto *C : S.getClausesOfKind<OMPDependClause>())
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *IRef : C->varlists())
|
2018-04-14 01:48:43 +08:00
|
|
|
Data.Dependences.emplace_back(C->getDependencyKind(), IRef);
|
2017-12-28 01:58:32 +08:00
|
|
|
auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD,
|
|
|
|
&InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
// Set proper addresses for generated private copies.
|
|
|
|
OMPPrivateScope Scope(CGF);
|
|
|
|
if (!Data.FirstprivateVars.empty()) {
|
2019-02-06 00:42:33 +08:00
|
|
|
llvm::FunctionType *CopyFnTy = llvm::FunctionType::get(
|
|
|
|
CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true);
|
2017-12-28 01:58:32 +08:00
|
|
|
enum { PrivatesParam = 2, CopyFnParam = 3 };
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::Value *CopyFn = CGF.Builder.CreateLoad(
|
|
|
|
CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
|
|
|
|
llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
|
|
|
|
CS->getCapturedDecl()->getParam(PrivatesParam)));
|
2017-12-28 01:58:32 +08:00
|
|
|
// Map privates.
|
|
|
|
llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
|
|
|
|
llvm::SmallVector<llvm::Value *, 16> CallArgs;
|
|
|
|
CallArgs.push_back(PrivatesPtr);
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *E : Data.FirstprivateVars) {
|
|
|
|
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
2017-12-28 01:58:32 +08:00
|
|
|
Address PrivatePtr =
|
|
|
|
CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
|
|
|
|
".firstpriv.ptr.addr");
|
2018-04-14 01:31:06 +08:00
|
|
|
PrivatePtrs.emplace_back(VD, PrivatePtr);
|
2017-12-28 01:58:32 +08:00
|
|
|
CallArgs.push_back(PrivatePtr.getPointer());
|
|
|
|
}
|
2019-02-06 00:42:33 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
|
|
|
|
CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const auto &Pair : PrivatePtrs) {
|
2017-12-28 01:58:32 +08:00
|
|
|
Address Replacement(CGF.Builder.CreateLoad(Pair.second),
|
|
|
|
CGF.getContext().getDeclAlign(Pair.first));
|
|
|
|
Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Privatize all private variables except for in_reduction items.
|
|
|
|
(void)Scope.Privatize();
|
2018-01-16 03:06:12 +08:00
|
|
|
if (InputInfo.NumberOfTargetItems > 0) {
|
|
|
|
InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
|
2019-02-10 06:22:28 +08:00
|
|
|
CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0);
|
2018-01-16 03:06:12 +08:00
|
|
|
InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
|
2019-02-10 06:22:28 +08:00
|
|
|
CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
|
2018-01-16 03:06:12 +08:00
|
|
|
InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
|
2019-02-10 06:22:28 +08:00
|
|
|
CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
|
2018-01-16 03:06:12 +08:00
|
|
|
}
|
2017-12-28 01:58:32 +08:00
|
|
|
|
|
|
|
Action.Enter(CGF);
|
2018-01-13 03:39:11 +08:00
|
|
|
OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
|
2017-12-28 01:58:32 +08:00
|
|
|
BodyGen(CGF);
|
|
|
|
};
|
2019-02-06 00:42:33 +08:00
|
|
|
llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
|
2017-12-28 01:58:32 +08:00
|
|
|
S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
|
|
|
|
Data.NumberOfParts);
|
|
|
|
llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
|
|
|
|
IntegerLiteral IfCond(getContext(), TrueOrFalse,
|
|
|
|
getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
|
|
|
|
SourceLocation());
|
|
|
|
|
2018-08-10 05:08:08 +08:00
|
|
|
CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
|
2017-12-28 01:58:32 +08:00
|
|
|
SharedsTy, CapturedStruct, &IfCond, Data);
|
|
|
|
}
|
|
|
|
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
|
|
|
|
// Emit outlined function for task construct.
|
2018-01-13 03:39:11 +08:00
|
|
|
const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
|
2018-04-14 01:31:06 +08:00
|
|
|
Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
|
|
|
|
QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
const Expr *IfCond = nullptr;
|
2015-09-03 16:45:56 +08:00
|
|
|
for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
|
|
|
|
if (C->getNameModifier() == OMPD_unknown ||
|
|
|
|
C->getNameModifier() == OMPD_task) {
|
|
|
|
IfCond = C->getCondition();
|
|
|
|
break;
|
|
|
|
}
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
}
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
|
2016-04-28 17:23:51 +08:00
|
|
|
OMPTaskDataTy Data;
|
|
|
|
// Check if we should emit tied or untied task.
|
|
|
|
Data.Tied = !S.getSingleClause<OMPUntiedClause>();
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
|
|
|
|
CGF.EmitStmt(CS->getCapturedStmt());
|
|
|
|
};
|
2016-04-28 17:23:51 +08:00
|
|
|
auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
|
2019-02-06 00:42:33 +08:00
|
|
|
IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
|
2016-04-28 17:23:51 +08:00
|
|
|
const OMPTaskDataTy &Data) {
|
2018-08-10 05:08:08 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
|
2016-04-28 17:23:51 +08:00
|
|
|
SharedsTy, CapturedStruct, IfCond,
|
|
|
|
Data);
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
};
|
2018-01-13 03:39:11 +08:00
|
|
|
EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
|
2014-07-11 19:25:16 +08:00
|
|
|
}
|
|
|
|
|
2015-02-05 13:57:51 +08:00
|
|
|
void CodeGenFunction::EmitOMPTaskyieldDirective(
|
|
|
|
const OMPTaskyieldDirective &S) {
|
2018-08-10 05:08:08 +08:00
|
|
|
CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
|
2014-07-18 15:47:19 +08:00
|
|
|
}
|
|
|
|
|
2014-12-05 12:09:23 +08:00
|
|
|
void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
|
2018-08-10 05:08:08 +08:00
|
|
|
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
|
2014-07-18 17:11:51 +08:00
|
|
|
}
|
|
|
|
|
2015-04-27 13:22:09 +08:00
|
|
|
void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
|
2018-08-10 05:08:08 +08:00
|
|
|
CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc());
|
2014-07-18 18:17:07 +08:00
|
|
|
}
|
|
|
|
|
2015-06-18 20:14:09 +08:00
|
|
|
void CodeGenFunction::EmitOMPTaskgroupDirective(
|
|
|
|
const OMPTaskgroupDirective &S) {
|
2016-03-29 13:34:15 +08:00
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
Action.Enter(CGF);
|
2017-07-25 23:53:26 +08:00
|
|
|
if (const Expr *E = S.getReductionRef()) {
|
|
|
|
SmallVector<const Expr *, 4> LHSs;
|
|
|
|
SmallVector<const Expr *, 4> RHSs;
|
|
|
|
OMPTaskDataTy Data;
|
|
|
|
for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
|
|
|
|
auto IPriv = C->privates().begin();
|
|
|
|
auto IRed = C->reduction_ops().begin();
|
|
|
|
auto ILHS = C->lhs_exprs().begin();
|
|
|
|
auto IRHS = C->rhs_exprs().begin();
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *Ref : C->varlists()) {
|
2017-07-25 23:53:26 +08:00
|
|
|
Data.ReductionVars.emplace_back(Ref);
|
|
|
|
Data.ReductionCopies.emplace_back(*IPriv);
|
|
|
|
Data.ReductionOps.emplace_back(*IRed);
|
|
|
|
LHSs.emplace_back(*ILHS);
|
|
|
|
RHSs.emplace_back(*IRHS);
|
|
|
|
std::advance(IPriv, 1);
|
|
|
|
std::advance(IRed, 1);
|
|
|
|
std::advance(ILHS, 1);
|
|
|
|
std::advance(IRHS, 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
llvm::Value *ReductionDesc =
|
2018-08-10 05:08:08 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
|
2017-07-25 23:53:26 +08:00
|
|
|
LHSs, RHSs, Data);
|
|
|
|
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
|
|
|
CGF.EmitVarDecl(*VD);
|
|
|
|
CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
|
|
|
|
/*Volatile=*/false, E->getType());
|
|
|
|
}
|
2018-01-13 03:39:11 +08:00
|
|
|
CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
|
2015-06-18 20:14:09 +08:00
|
|
|
};
|
2018-01-13 03:39:11 +08:00
|
|
|
OMPLexicalScope Scope(*this, S, OMPD_unknown);
|
2018-08-10 05:08:08 +08:00
|
|
|
CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
|
2015-06-18 20:14:09 +08:00
|
|
|
}
|
|
|
|
|
2014-11-20 12:34:54 +08:00
|
|
|
void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
|
2018-04-14 01:31:06 +08:00
|
|
|
CGM.getOpenMPRuntime().emitFlush(
|
|
|
|
*this,
|
|
|
|
[&S]() -> ArrayRef<const Expr *> {
|
|
|
|
if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
|
|
|
|
return llvm::makeArrayRef(FlushClause->varlist_begin(),
|
|
|
|
FlushClause->varlist_end());
|
|
|
|
return llvm::None;
|
|
|
|
}(),
|
2018-08-10 05:08:08 +08:00
|
|
|
S.getBeginLoc());
|
2014-07-21 19:26:11 +08:00
|
|
|
}
|
|
|
|
|
2017-04-26 01:52:12 +08:00
|
|
|
void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
|
|
|
|
const CodeGenLoopTy &CodeGenLoop,
|
|
|
|
Expr *IncExpr) {
|
2016-03-08 00:04:49 +08:00
|
|
|
// Emit the loop iteration variable.
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
|
|
|
|
const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
|
2016-03-08 00:04:49 +08:00
|
|
|
EmitVarDecl(*IVDecl);
|
|
|
|
|
|
|
|
// Emit the iterations count variable.
|
|
|
|
// If it is not a variable, Sema decided to calculate iterations count on each
|
|
|
|
// iteration (e.g., it is foldable into a constant).
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
|
2016-03-08 00:04:49 +08:00
|
|
|
EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
|
|
|
|
// Emit calculation of the iterations count.
|
|
|
|
EmitIgnoredExpr(S.getCalcLastIteration());
|
|
|
|
}
|
|
|
|
|
2018-04-14 01:31:06 +08:00
|
|
|
CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
|
2016-03-08 00:04:49 +08:00
|
|
|
|
2017-01-04 02:24:42 +08:00
|
|
|
bool HasLastprivateClause = false;
|
2016-03-08 00:04:49 +08:00
|
|
|
// Check pre-condition.
|
|
|
|
{
|
2016-03-29 16:58:54 +08:00
|
|
|
OMPLoopScope PreInitScope(*this, S);
|
2016-03-08 00:04:49 +08:00
|
|
|
// Skip the entire loop if we don't meet the precondition.
|
|
|
|
// If the condition constant folds and can be elided, avoid emitting the
|
|
|
|
// whole loop.
|
|
|
|
bool CondConstant;
|
|
|
|
llvm::BasicBlock *ContBlock = nullptr;
|
|
|
|
if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
|
|
|
|
if (!CondConstant)
|
|
|
|
return;
|
|
|
|
} else {
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
|
2016-03-08 00:04:49 +08:00
|
|
|
ContBlock = createBasicBlock("omp.precond.end");
|
|
|
|
emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
|
|
|
|
getProfileCount(&S));
|
|
|
|
EmitBlock(ThenBlock);
|
|
|
|
incrementProfileCounter(&S);
|
|
|
|
}
|
|
|
|
|
2017-12-04 23:38:33 +08:00
|
|
|
emitAlignedClause(*this, S);
|
2016-03-08 00:04:49 +08:00
|
|
|
// Emit 'then' code.
|
|
|
|
{
|
|
|
|
// Emit helper vars inits.
|
2017-04-26 01:52:12 +08:00
|
|
|
|
|
|
|
LValue LB = EmitOMPHelperVar(
|
|
|
|
*this, cast<DeclRefExpr>(
|
|
|
|
(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
|
|
|
|
? S.getCombinedLowerBoundVariable()
|
|
|
|
: S.getLowerBoundVariable())));
|
|
|
|
LValue UB = EmitOMPHelperVar(
|
|
|
|
*this, cast<DeclRefExpr>(
|
|
|
|
(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
|
|
|
|
? S.getCombinedUpperBoundVariable()
|
|
|
|
: S.getUpperBoundVariable())));
|
2016-03-08 00:04:49 +08:00
|
|
|
LValue ST =
|
|
|
|
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
|
|
|
|
LValue IL =
|
|
|
|
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
|
|
|
|
|
|
|
|
OMPPrivateScope LoopScope(*this);
|
2017-01-04 02:24:42 +08:00
|
|
|
if (EmitOMPFirstprivateClause(S, LoopScope)) {
|
2017-12-04 23:38:33 +08:00
|
|
|
// Emit implicit barrier to synchronize threads and avoid data races
|
|
|
|
// on initialization of firstprivate variables and post-update of
|
2017-01-04 02:24:42 +08:00
|
|
|
// lastprivate variables.
|
|
|
|
CGM.getOpenMPRuntime().emitBarrierCall(
|
2018-08-10 05:08:08 +08:00
|
|
|
*this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
|
2017-12-04 23:38:33 +08:00
|
|
|
/*ForceSimpleCall=*/true);
|
2017-01-04 02:24:42 +08:00
|
|
|
}
|
|
|
|
EmitOMPPrivateClause(S, LoopScope);
|
2017-12-04 23:38:33 +08:00
|
|
|
if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
|
2017-12-06 22:31:09 +08:00
|
|
|
!isOpenMPParallelDirective(S.getDirectiveKind()) &&
|
|
|
|
!isOpenMPTeamsDirective(S.getDirectiveKind()))
|
2017-12-04 23:38:33 +08:00
|
|
|
EmitOMPReductionClauseInit(S, LoopScope);
|
2017-01-04 02:24:42 +08:00
|
|
|
HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
|
2016-04-22 11:56:56 +08:00
|
|
|
EmitOMPPrivateLoopCounters(S, LoopScope);
|
2016-03-08 00:04:49 +08:00
|
|
|
(void)LoopScope.Privatize();
|
2018-10-30 23:50:12 +08:00
|
|
|
if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
|
|
|
|
CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
|
2016-03-08 00:04:49 +08:00
|
|
|
|
|
|
|
// Detect the distribute schedule kind and chunk.
|
|
|
|
llvm::Value *Chunk = nullptr;
|
|
|
|
OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
|
2016-03-08 00:04:49 +08:00
|
|
|
ScheduleKind = C->getDistScheduleKind();
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const Expr *Ch = C->getChunkSize()) {
|
2016-03-08 00:04:49 +08:00
|
|
|
Chunk = EmitScalarExpr(Ch);
|
|
|
|
Chunk = EmitScalarConversion(Chunk, Ch->getType(),
|
2017-12-04 23:38:33 +08:00
|
|
|
S.getIterationVariable()->getType(),
|
2018-08-10 05:08:08 +08:00
|
|
|
S.getBeginLoc());
|
2016-03-08 00:04:49 +08:00
|
|
|
}
|
2018-09-28 03:22:56 +08:00
|
|
|
} else {
|
|
|
|
// Default behaviour for dist_schedule clause.
|
|
|
|
CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
|
|
|
|
*this, S, ScheduleKind, Chunk);
|
2016-03-08 00:04:49 +08:00
|
|
|
}
|
|
|
|
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
|
|
|
|
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
|
|
|
|
|
|
|
|
// OpenMP [2.10.8, distribute Construct, Description]
|
|
|
|
// If dist_schedule is specified, kind must be static. If specified,
|
|
|
|
// iterations are divided into chunks of size chunk_size, chunks are
|
|
|
|
// assigned to the teams of the league in a round-robin fashion in the
|
|
|
|
// order of the team number. When no chunk_size is specified, the
|
|
|
|
// iteration space is divided into chunks that are approximately equal
|
|
|
|
// in size, and at most one chunk is distributed to each team of the
|
|
|
|
// league. The size of the chunks is unspecified in this case.
|
2018-10-29 23:45:47 +08:00
|
|
|
bool StaticChunked = RT.isStaticChunked(
|
|
|
|
ScheduleKind, /* Chunked */ Chunk != nullptr) &&
|
|
|
|
isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
|
2016-03-08 00:04:49 +08:00
|
|
|
if (RT.isStaticNonchunked(ScheduleKind,
|
2018-10-29 23:45:47 +08:00
|
|
|
/* Chunked */ Chunk != nullptr) ||
|
|
|
|
StaticChunked) {
|
2017-12-04 23:38:33 +08:00
|
|
|
if (isOpenMPSimdDirective(S.getDirectiveKind()))
|
|
|
|
EmitOMPSimdInit(S, /*IsMonotonic=*/true);
|
2017-08-15 01:56:13 +08:00
|
|
|
CGOpenMPRuntime::StaticRTInput StaticInit(
|
|
|
|
IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
|
2018-10-29 23:45:47 +08:00
|
|
|
LB.getAddress(), UB.getAddress(), ST.getAddress(),
|
|
|
|
StaticChunked ? Chunk : nullptr);
|
2018-08-10 05:08:08 +08:00
|
|
|
RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
|
2017-08-15 01:56:13 +08:00
|
|
|
StaticInit);
|
2018-04-14 01:31:06 +08:00
|
|
|
JumpDest LoopExit =
|
2016-03-08 00:04:49 +08:00
|
|
|
getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
|
|
|
|
// UB = min(UB, GlobalUB);
|
2017-04-26 01:52:12 +08:00
|
|
|
EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
|
|
|
|
? S.getCombinedEnsureUpperBound()
|
|
|
|
: S.getEnsureUpperBound());
|
2016-03-08 00:04:49 +08:00
|
|
|
// IV = LB;
|
2017-04-26 01:52:12 +08:00
|
|
|
EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
|
|
|
|
? S.getCombinedInit()
|
|
|
|
: S.getInit());
|
|
|
|
|
2018-04-14 01:31:06 +08:00
|
|
|
const Expr *Cond =
|
|
|
|
isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
|
|
|
|
? S.getCombinedCond()
|
|
|
|
: S.getCond();
|
2017-04-26 01:52:12 +08:00
|
|
|
|
2018-10-29 23:45:47 +08:00
|
|
|
if (StaticChunked)
|
|
|
|
Cond = S.getCombinedDistCond();
|
|
|
|
|
|
|
|
// For static unchunked schedules generate:
|
|
|
|
//
|
|
|
|
// 1. For distribute alone, codegen
|
|
|
|
// while (idx <= UB) {
|
|
|
|
// BODY;
|
|
|
|
// ++idx;
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// 2. When combined with 'for' (e.g. as in 'distribute parallel for')
|
|
|
|
// while (idx <= UB) {
|
|
|
|
// <CodeGen rest of pragma>(LB, UB);
|
|
|
|
// idx += ST;
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// For static chunk one schedule generate:
|
|
|
|
//
|
|
|
|
// while (IV <= GlobalUB) {
|
|
|
|
// <CodeGen rest of pragma>(LB, UB);
|
|
|
|
// LB += ST;
|
|
|
|
// UB += ST;
|
|
|
|
// UB = min(UB, GlobalUB);
|
|
|
|
// IV = LB;
|
|
|
|
// }
|
|
|
|
//
|
2017-04-26 01:52:12 +08:00
|
|
|
EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
|
|
|
|
[&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
|
|
|
|
CodeGenLoop(CGF, S, LoopExit);
|
2016-03-08 00:04:49 +08:00
|
|
|
},
|
2018-10-29 23:45:47 +08:00
|
|
|
[&S, StaticChunked](CodeGenFunction &CGF) {
|
|
|
|
if (StaticChunked) {
|
|
|
|
CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
|
|
|
|
CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
|
|
|
|
CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
|
|
|
|
CGF.EmitIgnoredExpr(S.getCombinedInit());
|
|
|
|
}
|
|
|
|
});
|
2016-03-08 00:04:49 +08:00
|
|
|
EmitBlock(LoopExit.getBlock());
|
|
|
|
// Tell the runtime we are done.
|
2018-08-10 05:08:08 +08:00
|
|
|
RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind());
|
2016-03-08 00:04:49 +08:00
|
|
|
} else {
|
|
|
|
// Emit the outer loop, which requests its work chunk [LB..UB] from
|
|
|
|
// runtime and runs the inner loop to process it.
|
2017-04-26 01:52:12 +08:00
|
|
|
const OMPLoopArguments LoopArguments = {
|
|
|
|
LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
|
|
|
|
Chunk};
|
|
|
|
EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
|
|
|
|
CodeGenLoop);
|
2016-03-08 00:04:49 +08:00
|
|
|
}
|
2017-12-04 23:38:33 +08:00
|
|
|
if (isOpenMPSimdDirective(S.getDirectiveKind())) {
|
2018-04-14 01:31:06 +08:00
|
|
|
EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
|
2017-12-04 23:38:33 +08:00
|
|
|
return CGF.Builder.CreateIsNotNull(
|
2018-08-10 05:08:08 +08:00
|
|
|
CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
|
2017-12-04 23:38:33 +08:00
|
|
|
});
|
|
|
|
}
|
2018-02-23 03:38:14 +08:00
|
|
|
if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
|
|
|
|
!isOpenMPParallelDirective(S.getDirectiveKind()) &&
|
|
|
|
!isOpenMPTeamsDirective(S.getDirectiveKind())) {
|
2018-10-03 03:12:47 +08:00
|
|
|
EmitOMPReductionClauseFinal(S, OMPD_simd);
|
2018-02-23 03:38:14 +08:00
|
|
|
// Emit post-update of the reduction variables if IsLastIter != 0.
|
|
|
|
emitPostUpdateForReductionClause(
|
2018-04-14 01:31:06 +08:00
|
|
|
*this, S, [IL, &S](CodeGenFunction &CGF) {
|
2018-02-23 03:38:14 +08:00
|
|
|
return CGF.Builder.CreateIsNotNull(
|
2018-08-10 05:08:08 +08:00
|
|
|
CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
|
2018-02-23 03:38:14 +08:00
|
|
|
});
|
2017-12-04 23:38:33 +08:00
|
|
|
}
|
2017-01-04 02:24:42 +08:00
|
|
|
// Emit final copy of the lastprivate variables if IsLastIter != 0.
|
2017-12-04 23:38:33 +08:00
|
|
|
if (HasLastprivateClause) {
|
2017-01-04 02:24:42 +08:00
|
|
|
EmitOMPLastprivateClauseFinal(
|
|
|
|
S, /*NoFinals=*/false,
|
2018-08-10 05:08:08 +08:00
|
|
|
Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
|
2017-12-04 23:38:33 +08:00
|
|
|
}
|
2016-03-08 00:04:49 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// We're now done with the loop, so jump to the continuation block.
|
|
|
|
if (ContBlock) {
|
|
|
|
EmitBranch(ContBlock);
|
|
|
|
EmitBlock(ContBlock, true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-12-14 22:51:25 +08:00
|
|
|
void CodeGenFunction::EmitOMPDistributeDirective(
|
|
|
|
const OMPDistributeDirective &S) {
|
2016-03-29 13:34:15 +08:00
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
|
2017-04-26 01:52:12 +08:00
|
|
|
CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
|
2016-03-08 00:04:49 +08:00
|
|
|
};
|
2018-01-13 03:39:11 +08:00
|
|
|
OMPLexicalScope Scope(*this, S, OMPD_unknown);
|
2017-11-28 00:54:08 +08:00
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
|
2015-12-14 22:51:25 +08:00
|
|
|
}
|
|
|
|
|
2015-09-29 11:48:57 +08:00
|
|
|
static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
|
|
|
|
const CapturedStmt *S) {
|
|
|
|
CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
|
|
|
|
CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
|
|
|
|
CGF.CapturedStmtInfo = &CapStmtInfo;
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
|
2018-04-11 04:10:53 +08:00
|
|
|
Fn->setDoesNotRecurse();
|
2015-09-29 11:48:57 +08:00
|
|
|
return Fn;
|
|
|
|
}
|
|
|
|
|
2015-04-22 19:15:40 +08:00
|
|
|
void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
|
2018-01-13 03:39:11 +08:00
|
|
|
if (S.hasClausesOfKind<OMPDependClause>()) {
|
|
|
|
assert(!S.getAssociatedStmt() &&
|
|
|
|
"No associated statement must be in ordered depend construct.");
|
2016-05-25 20:36:08 +08:00
|
|
|
for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
|
|
|
|
CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
|
2015-12-18 15:58:25 +08:00
|
|
|
return;
|
2016-05-25 20:36:08 +08:00
|
|
|
}
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *C = S.getSingleClause<OMPSIMDClause>();
|
2016-03-29 13:34:15 +08:00
|
|
|
auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
|
|
|
|
PrePostActionTy &Action) {
|
2018-01-13 03:39:11 +08:00
|
|
|
const CapturedStmt *CS = S.getInnermostCapturedStmt();
|
2015-09-29 11:48:57 +08:00
|
|
|
if (C) {
|
|
|
|
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
|
|
|
|
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
|
2018-08-10 05:08:08 +08:00
|
|
|
CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
|
2017-08-14 23:01:03 +08:00
|
|
|
OutlinedFn, CapturedVars);
|
2015-09-29 11:48:57 +08:00
|
|
|
} else {
|
2016-03-29 13:34:15 +08:00
|
|
|
Action.Enter(CGF);
|
2018-01-13 03:39:11 +08:00
|
|
|
CGF.EmitStmt(CS->getCapturedStmt());
|
2015-09-29 11:48:57 +08:00
|
|
|
}
|
2015-04-22 19:15:40 +08:00
|
|
|
};
|
2018-01-13 03:39:11 +08:00
|
|
|
OMPLexicalScope Scope(*this, S, OMPD_unknown);
|
2018-08-10 05:08:08 +08:00
|
|
|
CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C);
|
2014-07-22 14:45:04 +08:00
|
|
|
}
|
|
|
|
|
2015-01-22 14:17:56 +08:00
|
|
|
static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
|
2015-08-11 12:19:28 +08:00
|
|
|
QualType SrcType, QualType DestType,
|
|
|
|
SourceLocation Loc) {
|
2015-01-22 14:17:56 +08:00
|
|
|
assert(CGF.hasScalarEvaluationKind(DestType) &&
|
|
|
|
"DestType must have scalar evaluation kind.");
|
|
|
|
assert(!Val.isAggregate() && "Must be a scalar or complex.");
|
2018-04-14 01:31:06 +08:00
|
|
|
return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
|
|
|
|
DestType, Loc)
|
|
|
|
: CGF.EmitComplexToScalarConversion(
|
|
|
|
Val.getComplexVal(), SrcType, DestType, Loc);
|
2015-01-22 14:17:56 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static CodeGenFunction::ComplexPairTy
|
|
|
|
convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
|
2015-08-11 12:19:28 +08:00
|
|
|
QualType DestType, SourceLocation Loc) {
|
2015-01-22 14:17:56 +08:00
|
|
|
assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
|
|
|
|
"DestType must have complex evaluation kind.");
|
|
|
|
CodeGenFunction::ComplexPairTy ComplexVal;
|
|
|
|
if (Val.isScalar()) {
|
|
|
|
// Convert the input element to the element type of the complex.
|
2018-04-14 01:31:06 +08:00
|
|
|
QualType DestElementType =
|
|
|
|
DestType->castAs<ComplexType>()->getElementType();
|
|
|
|
llvm::Value *ScalarVal = CGF.EmitScalarConversion(
|
|
|
|
Val.getScalarVal(), SrcType, DestElementType, Loc);
|
2015-01-22 14:17:56 +08:00
|
|
|
ComplexVal = CodeGenFunction::ComplexPairTy(
|
|
|
|
ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
|
|
|
|
} else {
|
|
|
|
assert(Val.isComplex() && "Must be a scalar or complex.");
|
2018-04-14 01:31:06 +08:00
|
|
|
QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
|
|
|
|
QualType DestElementType =
|
|
|
|
DestType->castAs<ComplexType>()->getElementType();
|
2015-01-22 14:17:56 +08:00
|
|
|
ComplexVal.first = CGF.EmitScalarConversion(
|
2015-08-11 12:19:28 +08:00
|
|
|
Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
|
2015-01-22 14:17:56 +08:00
|
|
|
ComplexVal.second = CGF.EmitScalarConversion(
|
2015-08-11 12:19:28 +08:00
|
|
|
Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
|
2015-01-22 14:17:56 +08:00
|
|
|
}
|
|
|
|
return ComplexVal;
|
|
|
|
}
|
|
|
|
|
2015-04-23 14:35:10 +08:00
|
|
|
static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
|
|
|
|
LValue LVal, RValue RVal) {
|
|
|
|
if (LVal.isGlobalReg()) {
|
|
|
|
CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
|
|
|
|
} else {
|
2016-04-07 01:26:42 +08:00
|
|
|
CGF.EmitAtomicStore(RVal, LVal,
|
|
|
|
IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
|
|
|
|
: llvm::AtomicOrdering::Monotonic,
|
2019-07-16 12:46:31 +08:00
|
|
|
LVal.isVolatile(), /*isInit=*/false);
|
2015-04-23 14:35:10 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-01-21 20:35:58 +08:00
|
|
|
void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
|
|
|
|
QualType RValTy, SourceLocation Loc) {
|
|
|
|
switch (getEvaluationKind(LVal.getType())) {
|
2015-04-23 14:35:10 +08:00
|
|
|
case TEK_Scalar:
|
2016-01-21 20:35:58 +08:00
|
|
|
EmitStoreThroughLValue(RValue::get(convertToScalarValue(
|
|
|
|
*this, RVal, RValTy, LVal.getType(), Loc)),
|
|
|
|
LVal);
|
2015-04-23 14:35:10 +08:00
|
|
|
break;
|
|
|
|
case TEK_Complex:
|
2016-01-21 20:35:58 +08:00
|
|
|
EmitStoreOfComplex(
|
|
|
|
convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
|
2015-04-23 14:35:10 +08:00
|
|
|
/*isInit=*/false);
|
|
|
|
break;
|
|
|
|
case TEK_Aggregate:
|
|
|
|
llvm_unreachable("Must be a scalar or complex.");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-04-14 01:31:06 +08:00
|
|
|
static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
|
2015-01-22 14:17:56 +08:00
|
|
|
const Expr *X, const Expr *V,
|
|
|
|
SourceLocation Loc) {
|
|
|
|
// v = x;
|
|
|
|
assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
|
|
|
|
assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
|
|
|
|
LValue XLValue = CGF.EmitLValue(X);
|
|
|
|
LValue VLValue = CGF.EmitLValue(V);
|
2015-02-14 09:35:12 +08:00
|
|
|
RValue Res = XLValue.isGlobalReg()
|
|
|
|
? CGF.EmitLoadOfLValue(XLValue, Loc)
|
2016-04-07 01:26:42 +08:00
|
|
|
: CGF.EmitAtomicLoad(
|
|
|
|
XLValue, Loc,
|
|
|
|
IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
|
|
|
|
: llvm::AtomicOrdering::Monotonic,
|
|
|
|
XLValue.isVolatile());
|
2015-01-22 14:17:56 +08:00
|
|
|
// OpenMP, 2.12.6, atomic Construct
|
|
|
|
// Any atomic construct with a seq_cst clause forces the atomically
|
|
|
|
// performed operation to include an implicit flush operation without a
|
|
|
|
// list.
|
|
|
|
if (IsSeqCst)
|
2015-02-25 16:32:46 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
|
2016-01-21 20:35:58 +08:00
|
|
|
CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
|
2015-01-22 14:17:56 +08:00
|
|
|
}
|
|
|
|
|
2018-04-14 01:31:06 +08:00
|
|
|
static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
|
2015-02-27 14:33:30 +08:00
|
|
|
const Expr *X, const Expr *E,
|
|
|
|
SourceLocation Loc) {
|
|
|
|
// x = expr;
|
|
|
|
assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
|
2015-04-23 14:35:10 +08:00
|
|
|
emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
|
2015-02-27 14:33:30 +08:00
|
|
|
// OpenMP, 2.12.6, atomic Construct
|
|
|
|
// Any atomic construct with a seq_cst clause forces the atomically
|
|
|
|
// performed operation to include an implicit flush operation without a
|
|
|
|
// list.
|
|
|
|
if (IsSeqCst)
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
|
|
|
|
}
|
|
|
|
|
2015-05-01 21:59:53 +08:00
|
|
|
static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
|
|
|
|
RValue Update,
|
|
|
|
BinaryOperatorKind BO,
|
|
|
|
llvm::AtomicOrdering AO,
|
|
|
|
bool IsXLHSInRHSPart) {
|
2018-04-14 01:31:06 +08:00
|
|
|
ASTContext &Context = CGF.getContext();
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
// Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
|
2015-03-30 13:20:59 +08:00
|
|
|
// expression is simple and atomic is allowed for the given type for the
|
|
|
|
// target platform.
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
if (BO == BO_Comma || !Update.isScalar() ||
|
2015-05-08 19:47:16 +08:00
|
|
|
!Update.getScalarVal()->getType()->isIntegerTy() ||
|
|
|
|
!X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
|
|
|
|
(Update.getScalarVal()->getType() !=
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
X.getAddress().getElementType())) ||
|
|
|
|
!X.getAddress().getElementType()->isIntegerTy() ||
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
!Context.getTargetInfo().hasBuiltinAtomic(
|
|
|
|
Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
|
2015-04-23 14:35:10 +08:00
|
|
|
return std::make_pair(false, RValue::get(nullptr));
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
|
|
|
|
llvm::AtomicRMWInst::BinOp RMWOp;
|
|
|
|
switch (BO) {
|
|
|
|
case BO_Add:
|
|
|
|
RMWOp = llvm::AtomicRMWInst::Add;
|
|
|
|
break;
|
|
|
|
case BO_Sub:
|
|
|
|
if (!IsXLHSInRHSPart)
|
2015-04-23 14:35:10 +08:00
|
|
|
return std::make_pair(false, RValue::get(nullptr));
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
RMWOp = llvm::AtomicRMWInst::Sub;
|
|
|
|
break;
|
|
|
|
case BO_And:
|
|
|
|
RMWOp = llvm::AtomicRMWInst::And;
|
|
|
|
break;
|
|
|
|
case BO_Or:
|
|
|
|
RMWOp = llvm::AtomicRMWInst::Or;
|
|
|
|
break;
|
|
|
|
case BO_Xor:
|
|
|
|
RMWOp = llvm::AtomicRMWInst::Xor;
|
|
|
|
break;
|
|
|
|
case BO_LT:
|
|
|
|
RMWOp = X.getType()->hasSignedIntegerRepresentation()
|
|
|
|
? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
|
|
|
|
: llvm::AtomicRMWInst::Max)
|
|
|
|
: (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
|
|
|
|
: llvm::AtomicRMWInst::UMax);
|
|
|
|
break;
|
|
|
|
case BO_GT:
|
|
|
|
RMWOp = X.getType()->hasSignedIntegerRepresentation()
|
|
|
|
? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
|
|
|
|
: llvm::AtomicRMWInst::Min)
|
|
|
|
: (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
|
|
|
|
: llvm::AtomicRMWInst::UMin);
|
|
|
|
break;
|
2015-04-23 14:35:10 +08:00
|
|
|
case BO_Assign:
|
|
|
|
RMWOp = llvm::AtomicRMWInst::Xchg;
|
|
|
|
break;
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
case BO_Mul:
|
|
|
|
case BO_Div:
|
|
|
|
case BO_Rem:
|
|
|
|
case BO_Shl:
|
|
|
|
case BO_Shr:
|
|
|
|
case BO_LAnd:
|
|
|
|
case BO_LOr:
|
2015-04-23 14:35:10 +08:00
|
|
|
return std::make_pair(false, RValue::get(nullptr));
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
case BO_PtrMemD:
|
|
|
|
case BO_PtrMemI:
|
|
|
|
case BO_LE:
|
|
|
|
case BO_GE:
|
|
|
|
case BO_EQ:
|
|
|
|
case BO_NE:
|
2017-12-14 23:16:18 +08:00
|
|
|
case BO_Cmp:
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
case BO_AddAssign:
|
|
|
|
case BO_SubAssign:
|
|
|
|
case BO_AndAssign:
|
|
|
|
case BO_OrAssign:
|
|
|
|
case BO_XorAssign:
|
|
|
|
case BO_MulAssign:
|
|
|
|
case BO_DivAssign:
|
|
|
|
case BO_RemAssign:
|
|
|
|
case BO_ShlAssign:
|
|
|
|
case BO_ShrAssign:
|
|
|
|
case BO_Comma:
|
|
|
|
llvm_unreachable("Unsupported atomic update operation");
|
|
|
|
}
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::Value *UpdateVal = Update.getScalarVal();
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
|
|
|
|
UpdateVal = CGF.Builder.CreateIntCast(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
IC, X.getAddress().getElementType(),
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
X.getType()->hasSignedIntegerRepresentation());
|
|
|
|
}
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::Value *Res =
|
|
|
|
CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
|
2015-04-23 14:35:10 +08:00
|
|
|
return std::make_pair(true, RValue::get(Res));
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
}
|
|
|
|
|
2015-04-23 14:35:10 +08:00
|
|
|
std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
|
|
|
|
llvm::AtomicOrdering AO, SourceLocation Loc,
|
2018-04-14 01:31:06 +08:00
|
|
|
const llvm::function_ref<RValue(RValue)> CommonGen) {
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
// Update expressions are allowed to have the following forms:
|
|
|
|
// x binop= expr; -> xrval + expr;
|
|
|
|
// x++, ++x -> xrval + 1;
|
|
|
|
// x--, --x -> xrval - 1;
|
|
|
|
// x = x binop expr; -> xrval binop expr
|
|
|
|
// x = expr Op x; - > expr binop xrval;
|
2015-04-23 14:35:10 +08:00
|
|
|
auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
|
|
|
|
if (!Res.first) {
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
if (X.isGlobalReg()) {
|
|
|
|
// Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
|
|
|
|
// 'xrval'.
|
|
|
|
EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
|
|
|
|
} else {
|
|
|
|
// Perform compare-and-swap procedure.
|
|
|
|
EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
|
2015-03-30 13:20:59 +08:00
|
|
|
}
|
|
|
|
}
|
2015-04-23 14:35:10 +08:00
|
|
|
return Res;
|
2015-03-30 13:20:59 +08:00
|
|
|
}
|
|
|
|
|
2018-04-14 01:31:06 +08:00
|
|
|
static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
|
2015-03-30 13:20:59 +08:00
|
|
|
const Expr *X, const Expr *E,
|
|
|
|
const Expr *UE, bool IsXLHSInRHSPart,
|
|
|
|
SourceLocation Loc) {
|
|
|
|
assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
|
|
|
|
"Update expr in 'atomic update' must be a binary operator.");
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
|
2015-03-30 13:20:59 +08:00
|
|
|
// Update expressions are allowed to have the following forms:
|
|
|
|
// x binop= expr; -> xrval + expr;
|
|
|
|
// x++, ++x -> xrval + 1;
|
|
|
|
// x--, --x -> xrval - 1;
|
|
|
|
// x = x binop expr; -> xrval binop expr
|
|
|
|
// x = expr Op x; - > expr binop xrval;
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
|
2015-03-30 13:20:59 +08:00
|
|
|
LValue XLValue = CGF.EmitLValue(X);
|
|
|
|
RValue ExprRValue = CGF.EmitAnyExpr(E);
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::AtomicOrdering AO = IsSeqCst
|
|
|
|
? llvm::AtomicOrdering::SequentiallyConsistent
|
|
|
|
: llvm::AtomicOrdering::Monotonic;
|
|
|
|
const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
|
|
|
|
const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
|
|
|
|
const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
|
|
|
|
const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
|
|
|
|
auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
|
|
|
|
CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
|
|
|
|
CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
|
|
|
|
return CGF.EmitAnyExpr(UE);
|
|
|
|
};
|
2015-04-23 14:35:10 +08:00
|
|
|
(void)CGF.EmitOMPAtomicSimpleUpdateExpr(
|
|
|
|
XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
|
|
|
|
// OpenMP, 2.12.6, atomic Construct
|
|
|
|
// Any atomic construct with a seq_cst clause forces the atomically
|
|
|
|
// performed operation to include an implicit flush operation without a
|
|
|
|
// list.
|
|
|
|
if (IsSeqCst)
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
|
|
|
|
}
|
|
|
|
|
|
|
|
static RValue convertToType(CodeGenFunction &CGF, RValue Value,
|
2015-08-11 12:19:28 +08:00
|
|
|
QualType SourceType, QualType ResType,
|
|
|
|
SourceLocation Loc) {
|
2015-04-23 14:35:10 +08:00
|
|
|
switch (CGF.getEvaluationKind(ResType)) {
|
|
|
|
case TEK_Scalar:
|
2015-08-11 12:19:28 +08:00
|
|
|
return RValue::get(
|
|
|
|
convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
|
2015-04-23 14:35:10 +08:00
|
|
|
case TEK_Complex: {
|
2015-08-11 12:19:28 +08:00
|
|
|
auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
|
2015-04-23 14:35:10 +08:00
|
|
|
return RValue::getComplex(Res.first, Res.second);
|
|
|
|
}
|
|
|
|
case TEK_Aggregate:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
llvm_unreachable("Must be a scalar or complex.");
|
|
|
|
}
|
|
|
|
|
2018-04-14 01:31:06 +08:00
|
|
|
static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
|
2015-04-23 14:35:10 +08:00
|
|
|
bool IsPostfixUpdate, const Expr *V,
|
|
|
|
const Expr *X, const Expr *E,
|
|
|
|
const Expr *UE, bool IsXLHSInRHSPart,
|
|
|
|
SourceLocation Loc) {
|
|
|
|
assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
|
|
|
|
assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
|
|
|
|
RValue NewVVal;
|
|
|
|
LValue VLValue = CGF.EmitLValue(V);
|
|
|
|
LValue XLValue = CGF.EmitLValue(X);
|
|
|
|
RValue ExprRValue = CGF.EmitAnyExpr(E);
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::AtomicOrdering AO = IsSeqCst
|
|
|
|
? llvm::AtomicOrdering::SequentiallyConsistent
|
|
|
|
: llvm::AtomicOrdering::Monotonic;
|
2015-04-23 14:35:10 +08:00
|
|
|
QualType NewVValType;
|
|
|
|
if (UE) {
|
|
|
|
// 'x' is updated with some additional value.
|
|
|
|
assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
|
|
|
|
"Update expr in 'atomic capture' must be a binary operator.");
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
|
2015-04-23 14:35:10 +08:00
|
|
|
// Update expressions are allowed to have the following forms:
|
|
|
|
// x binop= expr; -> xrval + expr;
|
|
|
|
// x++, ++x -> xrval + 1;
|
|
|
|
// x--, --x -> xrval - 1;
|
|
|
|
// x = x binop expr; -> xrval binop expr
|
|
|
|
// x = expr Op x; - > expr binop xrval;
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
|
|
|
|
const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
|
|
|
|
const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
|
2015-04-23 14:35:10 +08:00
|
|
|
NewVValType = XRValExpr->getType();
|
2018-04-14 01:31:06 +08:00
|
|
|
const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
|
2015-04-23 14:35:10 +08:00
|
|
|
auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
|
2018-04-14 01:31:06 +08:00
|
|
|
IsPostfixUpdate](RValue XRValue) {
|
2015-04-23 14:35:10 +08:00
|
|
|
CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
|
|
|
|
CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
|
|
|
|
RValue Res = CGF.EmitAnyExpr(UE);
|
|
|
|
NewVVal = IsPostfixUpdate ? XRValue : Res;
|
|
|
|
return Res;
|
|
|
|
};
|
|
|
|
auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
|
|
|
|
XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
|
|
|
|
if (Res.first) {
|
|
|
|
// 'atomicrmw' instruction was generated.
|
|
|
|
if (IsPostfixUpdate) {
|
|
|
|
// Use old value from 'atomicrmw'.
|
|
|
|
NewVVal = Res.second;
|
|
|
|
} else {
|
|
|
|
// 'atomicrmw' does not provide new value, so evaluate it using old
|
|
|
|
// value of 'x'.
|
|
|
|
CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
|
|
|
|
CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
|
|
|
|
NewVVal = CGF.EmitAnyExpr(UE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// 'x' is simply rewritten with some 'expr'.
|
|
|
|
NewVValType = X->getType().getNonReferenceType();
|
|
|
|
ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
|
2015-08-11 12:19:28 +08:00
|
|
|
X->getType().getNonReferenceType(), Loc);
|
2018-04-14 01:31:06 +08:00
|
|
|
auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
|
2015-04-23 14:35:10 +08:00
|
|
|
NewVVal = XRValue;
|
|
|
|
return ExprRValue;
|
|
|
|
};
|
|
|
|
// Try to perform atomicrmw xchg, otherwise simple exchange.
|
|
|
|
auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
|
|
|
|
XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
|
|
|
|
Loc, Gen);
|
|
|
|
if (Res.first) {
|
|
|
|
// 'atomicrmw' instruction was generated.
|
|
|
|
NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Emit post-update store to 'v' of old/new 'x' value.
|
2016-01-21 20:35:58 +08:00
|
|
|
CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
|
2015-03-30 13:20:59 +08:00
|
|
|
// OpenMP, 2.12.6, atomic Construct
|
|
|
|
// Any atomic construct with a seq_cst clause forces the atomically
|
|
|
|
// performed operation to include an implicit flush operation without a
|
|
|
|
// list.
|
|
|
|
if (IsSeqCst)
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
|
|
|
|
}
|
|
|
|
|
2018-04-14 01:31:06 +08:00
|
|
|
static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
|
2015-04-23 14:35:10 +08:00
|
|
|
bool IsSeqCst, bool IsPostfixUpdate,
|
|
|
|
const Expr *X, const Expr *V, const Expr *E,
|
|
|
|
const Expr *UE, bool IsXLHSInRHSPart,
|
|
|
|
SourceLocation Loc) {
|
2015-01-22 14:17:56 +08:00
|
|
|
switch (Kind) {
|
|
|
|
case OMPC_read:
|
2018-04-14 01:31:06 +08:00
|
|
|
emitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
|
2015-01-22 14:17:56 +08:00
|
|
|
break;
|
|
|
|
case OMPC_write:
|
2018-04-14 01:31:06 +08:00
|
|
|
emitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
|
2015-02-27 14:33:30 +08:00
|
|
|
break;
|
2015-03-30 13:20:59 +08:00
|
|
|
case OMPC_unknown:
|
2015-01-22 14:17:56 +08:00
|
|
|
case OMPC_update:
|
2018-04-14 01:31:06 +08:00
|
|
|
emitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
|
2015-03-30 13:20:59 +08:00
|
|
|
break;
|
2015-01-22 14:17:56 +08:00
|
|
|
case OMPC_capture:
|
2018-04-14 01:31:06 +08:00
|
|
|
emitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
|
2015-04-23 14:35:10 +08:00
|
|
|
IsXLHSInRHSPart, Loc);
|
|
|
|
break;
|
2015-01-22 14:17:56 +08:00
|
|
|
case OMPC_if:
|
|
|
|
case OMPC_final:
|
|
|
|
case OMPC_num_threads:
|
|
|
|
case OMPC_private:
|
|
|
|
case OMPC_firstprivate:
|
|
|
|
case OMPC_lastprivate:
|
|
|
|
case OMPC_reduction:
|
2017-07-19 04:17:46 +08:00
|
|
|
case OMPC_task_reduction:
|
2017-07-22 02:48:21 +08:00
|
|
|
case OMPC_in_reduction:
|
2015-01-22 14:17:56 +08:00
|
|
|
case OMPC_safelen:
|
2015-08-21 19:14:16 +08:00
|
|
|
case OMPC_simdlen:
|
2019-03-13 02:52:33 +08:00
|
|
|
case OMPC_allocator:
|
2019-03-27 22:14:31 +08:00
|
|
|
case OMPC_allocate:
|
2015-01-22 14:17:56 +08:00
|
|
|
case OMPC_collapse:
|
|
|
|
case OMPC_default:
|
|
|
|
case OMPC_seq_cst:
|
|
|
|
case OMPC_shared:
|
|
|
|
case OMPC_linear:
|
|
|
|
case OMPC_aligned:
|
|
|
|
case OMPC_copyin:
|
|
|
|
case OMPC_copyprivate:
|
|
|
|
case OMPC_flush:
|
|
|
|
case OMPC_proc_bind:
|
|
|
|
case OMPC_schedule:
|
|
|
|
case OMPC_ordered:
|
|
|
|
case OMPC_nowait:
|
|
|
|
case OMPC_untied:
|
|
|
|
case OMPC_threadprivate:
|
2015-06-23 22:25:19 +08:00
|
|
|
case OMPC_depend:
|
2015-01-22 14:17:56 +08:00
|
|
|
case OMPC_mergeable:
|
2015-08-08 00:16:36 +08:00
|
|
|
case OMPC_device:
|
2015-09-25 18:37:12 +08:00
|
|
|
case OMPC_threads:
|
2015-09-28 14:39:35 +08:00
|
|
|
case OMPC_simd:
|
2015-11-23 13:32:03 +08:00
|
|
|
case OMPC_map:
|
2015-11-25 04:50:12 +08:00
|
|
|
case OMPC_num_teams:
|
2015-11-28 02:47:36 +08:00
|
|
|
case OMPC_thread_limit:
|
2015-12-01 18:17:31 +08:00
|
|
|
case OMPC_priority:
|
2015-12-07 20:52:51 +08:00
|
|
|
case OMPC_grainsize:
|
2015-12-07 18:51:44 +08:00
|
|
|
case OMPC_nogroup:
|
2015-12-08 20:06:20 +08:00
|
|
|
case OMPC_num_tasks:
|
2015-12-15 16:19:24 +08:00
|
|
|
case OMPC_hint:
|
2016-01-16 02:50:31 +08:00
|
|
|
case OMPC_dist_schedule:
|
2016-01-27 00:37:23 +08:00
|
|
|
case OMPC_defaultmap:
|
2016-04-12 13:28:34 +08:00
|
|
|
case OMPC_uniform:
|
2016-05-27 01:39:58 +08:00
|
|
|
case OMPC_to:
|
2016-05-27 01:49:04 +08:00
|
|
|
case OMPC_from:
|
2016-07-13 23:37:16 +08:00
|
|
|
case OMPC_use_device_ptr:
|
2016-07-14 01:16:49 +08:00
|
|
|
case OMPC_is_device_ptr:
|
2018-09-26 12:28:39 +08:00
|
|
|
case OMPC_unified_address:
|
2018-10-01 22:26:31 +08:00
|
|
|
case OMPC_unified_shared_memory:
|
2018-10-04 04:07:58 +08:00
|
|
|
case OMPC_reverse_offload:
|
2018-10-11 22:41:10 +08:00
|
|
|
case OMPC_dynamic_allocators:
|
2018-11-02 20:18:11 +08:00
|
|
|
case OMPC_atomic_default_mem_order:
|
2019-08-24 00:11:14 +08:00
|
|
|
case OMPC_device_type:
|
2019-09-24 02:13:31 +08:00
|
|
|
case OMPC_match:
|
2015-01-22 14:17:56 +08:00
|
|
|
llvm_unreachable("Clause is not allowed in 'omp atomic'.");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
|
2015-08-30 23:12:28 +08:00
|
|
|
bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
|
2015-01-22 14:17:56 +08:00
|
|
|
OpenMPClauseKind Kind = OMPC_unknown;
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const OMPClause *C : S.clauses()) {
|
2015-01-22 14:17:56 +08:00
|
|
|
// Find first clause (skip seq_cst clause, if it is first).
|
|
|
|
if (C->getClauseKind() != OMPC_seq_cst) {
|
|
|
|
Kind = C->getClauseKind();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2015-03-11 12:48:56 +08:00
|
|
|
|
2018-04-14 01:31:06 +08:00
|
|
|
const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers();
|
2018-10-31 11:48:47 +08:00
|
|
|
if (const auto *FE = dyn_cast<FullExpr>(CS))
|
|
|
|
enterFullExpression(FE);
|
2015-04-23 14:35:10 +08:00
|
|
|
// Processing for statements under 'atomic capture'.
|
|
|
|
if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Stmt *C : Compound->body()) {
|
2018-10-31 11:48:47 +08:00
|
|
|
if (const auto *FE = dyn_cast<FullExpr>(C))
|
|
|
|
enterFullExpression(FE);
|
2015-04-23 14:35:10 +08:00
|
|
|
}
|
|
|
|
}
|
2015-03-11 12:48:56 +08:00
|
|
|
|
2016-03-29 13:34:15 +08:00
|
|
|
auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
|
|
|
|
PrePostActionTy &) {
|
2015-12-14 17:26:19 +08:00
|
|
|
CGF.EmitStopPoint(CS);
|
2018-04-14 01:31:06 +08:00
|
|
|
emitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
|
2015-04-23 14:35:10 +08:00
|
|
|
S.getV(), S.getExpr(), S.getUpdateExpr(),
|
2018-08-10 05:08:08 +08:00
|
|
|
S.isXLHSInRHSPart(), S.getBeginLoc());
|
2015-04-10 12:50:10 +08:00
|
|
|
};
|
2018-01-13 03:39:11 +08:00
|
|
|
OMPLexicalScope Scope(*this, S, OMPD_unknown);
|
2015-07-03 17:56:58 +08:00
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
|
2014-07-22 18:10:35 +08:00
|
|
|
}
|
|
|
|
|
2017-01-16 23:26:02 +08:00
|
|
|
static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
|
|
|
|
const OMPExecutableDirective &S,
|
|
|
|
const RegionCodeGenTy &CodeGen) {
|
|
|
|
assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
|
|
|
|
CodeGenModule &CGM = CGF.CGM;
|
2015-10-03 00:14:20 +08:00
|
|
|
|
2018-03-15 23:47:20 +08:00
|
|
|
// On device emit this construct as inlined code.
|
|
|
|
if (CGM.getLangOpts().OpenMPIsDevice) {
|
|
|
|
OMPLexicalScope Scope(CGF, S, OMPD_target);
|
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(
|
|
|
|
CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
|
2018-05-16 23:08:32 +08:00
|
|
|
CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
|
2018-03-15 23:47:20 +08:00
|
|
|
});
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-01-06 21:42:12 +08:00
|
|
|
llvm::Function *Fn = nullptr;
|
|
|
|
llvm::Constant *FnID = nullptr;
|
2015-10-03 00:14:20 +08:00
|
|
|
|
|
|
|
const Expr *IfCond = nullptr;
|
2017-01-19 04:40:48 +08:00
|
|
|
// Check for the at most one if clause associated with the target region.
|
|
|
|
for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
|
|
|
|
if (C->getNameModifier() == OMPD_unknown ||
|
|
|
|
C->getNameModifier() == OMPD_target) {
|
|
|
|
IfCond = C->getCondition();
|
|
|
|
break;
|
|
|
|
}
|
2015-10-03 00:14:20 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check if we have any device clause associated with the directive.
|
|
|
|
const Expr *Device = nullptr;
|
2018-04-14 01:31:06 +08:00
|
|
|
if (auto *C = S.getSingleClause<OMPDeviceClause>())
|
2015-10-03 00:14:20 +08:00
|
|
|
Device = C->getDevice();
|
|
|
|
|
2016-01-06 21:42:12 +08:00
|
|
|
// Check if we have an if clause whose conditional always evaluates to false
|
|
|
|
// or if we do not have any targets specified. If so the target region is not
|
|
|
|
// an offload entry point.
|
|
|
|
bool IsOffloadEntry = true;
|
|
|
|
if (IfCond) {
|
|
|
|
bool Val;
|
2017-01-16 23:26:02 +08:00
|
|
|
if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
|
2016-01-06 21:42:12 +08:00
|
|
|
IsOffloadEntry = false;
|
|
|
|
}
|
|
|
|
if (CGM.getLangOpts().OMPTargetTriples.empty())
|
|
|
|
IsOffloadEntry = false;
|
|
|
|
|
2017-01-16 23:26:02 +08:00
|
|
|
assert(CGF.CurFuncDecl && "No parent declaration for target region!");
|
2016-01-06 21:42:12 +08:00
|
|
|
StringRef ParentName;
|
|
|
|
// In case we have Ctors/Dtors we use the complete type variant to produce
|
|
|
|
// the mangling of the device outlined kernel.
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
|
2016-01-06 21:42:12 +08:00
|
|
|
ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
|
2018-04-14 01:31:06 +08:00
|
|
|
else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
|
2016-01-06 21:42:12 +08:00
|
|
|
ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
|
|
|
|
else
|
|
|
|
ParentName =
|
2017-01-16 23:26:02 +08:00
|
|
|
CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
|
2016-01-06 21:42:12 +08:00
|
|
|
|
2017-01-16 23:26:02 +08:00
|
|
|
// Emit target region as a standalone region.
|
|
|
|
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
|
|
|
|
IsOffloadEntry, CodeGen);
|
2018-01-16 03:06:12 +08:00
|
|
|
OMPLexicalScope Scope(CGF, S, OMPD_task);
|
2019-09-23 22:06:51 +08:00
|
|
|
auto &&SizeEmitter =
|
|
|
|
[IsOffloadEntry](CodeGenFunction &CGF,
|
|
|
|
const OMPLoopDirective &D) -> llvm::Value * {
|
|
|
|
if (IsOffloadEntry) {
|
|
|
|
OMPLoopScope(CGF, D);
|
|
|
|
// Emit calculation of the iterations count.
|
|
|
|
llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
|
|
|
|
NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
|
|
|
|
/*isSigned=*/false);
|
|
|
|
return NumIterations;
|
|
|
|
}
|
|
|
|
return nullptr;
|
2019-01-08 05:30:43 +08:00
|
|
|
};
|
2019-09-23 22:06:51 +08:00
|
|
|
CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
|
|
|
|
SizeEmitter);
|
2014-09-19 16:19:49 +08:00
|
|
|
}
|
|
|
|
|
2017-01-16 23:26:02 +08:00
|
|
|
static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
|
|
|
|
PrePostActionTy &Action) {
|
2018-03-20 22:45:59 +08:00
|
|
|
Action.Enter(CGF);
|
2017-01-16 23:26:02 +08:00
|
|
|
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
|
|
|
|
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
|
|
|
|
CGF.EmitOMPPrivateClause(S, PrivateScope);
|
|
|
|
(void)PrivateScope.Privatize();
|
2018-10-30 23:50:12 +08:00
|
|
|
if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
|
|
|
|
CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
|
2017-01-16 23:26:02 +08:00
|
|
|
|
2018-01-13 03:39:11 +08:00
|
|
|
CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
|
2017-01-16 23:26:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
|
|
|
|
StringRef ParentName,
|
|
|
|
const OMPTargetDirective &S) {
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
emitTargetRegion(CGF, S, Action);
|
|
|
|
};
|
|
|
|
llvm::Function *Fn;
|
|
|
|
llvm::Constant *Addr;
|
|
|
|
// Emit target region as a standalone region.
|
|
|
|
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
|
|
|
|
S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
|
|
|
|
assert(Fn && Addr && "Target device function emission failed.");
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
emitTargetRegion(CGF, S, Action);
|
|
|
|
};
|
|
|
|
emitCommonOMPTargetDirective(*this, S, CodeGen);
|
|
|
|
}
|
|
|
|
|
2016-03-04 04:34:23 +08:00
|
|
|
static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
|
|
|
|
const OMPExecutableDirective &S,
|
|
|
|
OpenMPDirectiveKind InnermostKind,
|
|
|
|
const RegionCodeGenTy &CodeGen) {
|
2017-01-19 02:18:53 +08:00
|
|
|
const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
|
2019-02-06 00:42:33 +08:00
|
|
|
llvm::Function *OutlinedFn =
|
2018-04-14 01:31:06 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
|
|
|
|
S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
|
2016-03-04 04:34:23 +08:00
|
|
|
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
|
|
|
|
const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
|
2016-03-04 04:34:23 +08:00
|
|
|
if (NT || TL) {
|
2018-04-14 01:31:06 +08:00
|
|
|
const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr;
|
|
|
|
const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr;
|
2016-04-04 23:55:02 +08:00
|
|
|
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
|
2018-08-10 05:08:08 +08:00
|
|
|
S.getBeginLoc());
|
2016-03-04 04:34:23 +08:00
|
|
|
}
|
2016-03-04 00:20:23 +08:00
|
|
|
|
2017-01-25 10:18:43 +08:00
|
|
|
OMPTeamsScope Scope(CGF, S);
|
2016-03-29 13:34:15 +08:00
|
|
|
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
|
|
|
|
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
|
2018-08-10 05:08:08 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn,
|
2016-03-04 04:34:23 +08:00
|
|
|
CapturedVars);
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
|
2016-12-16 01:55:32 +08:00
|
|
|
// Emit teams region as a standalone region.
|
2018-03-16 02:10:54 +08:00
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
2018-03-20 22:45:59 +08:00
|
|
|
Action.Enter(CGF);
|
2016-03-04 04:34:23 +08:00
|
|
|
OMPPrivateScope PrivateScope(CGF);
|
2016-03-04 06:09:40 +08:00
|
|
|
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
|
|
|
|
CGF.EmitOMPPrivateClause(S, PrivateScope);
|
2017-02-17 00:48:49 +08:00
|
|
|
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
|
2016-03-04 04:34:23 +08:00
|
|
|
(void)PrivateScope.Privatize();
|
2018-01-13 03:39:11 +08:00
|
|
|
CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
|
2017-02-17 00:48:49 +08:00
|
|
|
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
|
2016-03-04 04:34:23 +08:00
|
|
|
};
|
2017-11-17 02:20:21 +08:00
|
|
|
emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
|
2018-04-14 01:31:06 +08:00
|
|
|
emitPostUpdateForReductionClause(*this, S,
|
|
|
|
[](CodeGenFunction &) { return nullptr; });
|
2014-10-09 12:18:56 +08:00
|
|
|
}
|
2015-07-01 14:57:41 +08:00
|
|
|
|
2017-01-25 10:18:43 +08:00
|
|
|
static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
|
|
|
|
const OMPTargetTeamsDirective &S) {
|
|
|
|
auto *CS = S.getCapturedStmt(OMPD_teams);
|
|
|
|
Action.Enter(CGF);
|
2017-11-22 22:25:55 +08:00
|
|
|
// Emit teams region as a standalone region.
|
|
|
|
auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
2018-03-20 22:45:59 +08:00
|
|
|
Action.Enter(CGF);
|
2017-11-22 22:25:55 +08:00
|
|
|
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
|
|
|
|
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
|
|
|
|
CGF.EmitOMPPrivateClause(S, PrivateScope);
|
|
|
|
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
|
|
|
|
(void)PrivateScope.Privatize();
|
2018-10-30 23:50:12 +08:00
|
|
|
if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
|
|
|
|
CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
|
2017-01-25 10:18:43 +08:00
|
|
|
CGF.EmitStmt(CS->getCapturedStmt());
|
2017-11-22 22:25:55 +08:00
|
|
|
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
|
2017-01-25 10:18:43 +08:00
|
|
|
};
|
|
|
|
emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
|
2018-04-14 01:31:06 +08:00
|
|
|
emitPostUpdateForReductionClause(CGF, S,
|
|
|
|
[](CodeGenFunction &) { return nullptr; });
|
2017-01-25 10:18:43 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
|
|
|
|
CodeGenModule &CGM, StringRef ParentName,
|
|
|
|
const OMPTargetTeamsDirective &S) {
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
emitTargetTeamsRegion(CGF, Action, S);
|
|
|
|
};
|
|
|
|
llvm::Function *Fn;
|
|
|
|
llvm::Constant *Addr;
|
|
|
|
// Emit target region as a standalone region.
|
|
|
|
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
|
|
|
|
S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
|
|
|
|
assert(Fn && Addr && "Target device function emission failed.");
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPTargetTeamsDirective(
|
|
|
|
const OMPTargetTeamsDirective &S) {
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
emitTargetTeamsRegion(CGF, Action, S);
|
|
|
|
};
|
|
|
|
emitCommonOMPTargetDirective(*this, S, CodeGen);
|
|
|
|
}
|
|
|
|
|
2017-12-08 23:03:50 +08:00
|
|
|
static void
|
|
|
|
emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
|
|
|
|
const OMPTargetTeamsDistributeDirective &S) {
|
|
|
|
Action.Enter(CGF);
|
|
|
|
auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
|
|
|
|
CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
|
|
|
|
};
|
|
|
|
|
|
|
|
// Emit teams region as a standalone region.
|
|
|
|
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
|
2018-03-16 02:10:54 +08:00
|
|
|
PrePostActionTy &Action) {
|
2018-03-20 22:45:59 +08:00
|
|
|
Action.Enter(CGF);
|
2017-12-08 23:03:50 +08:00
|
|
|
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
|
|
|
|
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
|
|
|
|
(void)PrivateScope.Privatize();
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
|
|
|
|
CodeGenDistribute);
|
|
|
|
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
|
|
|
|
};
|
|
|
|
emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
|
|
|
|
emitPostUpdateForReductionClause(CGF, S,
|
|
|
|
[](CodeGenFunction &) { return nullptr; });
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
|
|
|
|
CodeGenModule &CGM, StringRef ParentName,
|
|
|
|
const OMPTargetTeamsDistributeDirective &S) {
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
emitTargetTeamsDistributeRegion(CGF, Action, S);
|
|
|
|
};
|
|
|
|
llvm::Function *Fn;
|
|
|
|
llvm::Constant *Addr;
|
|
|
|
// Emit target region as a standalone region.
|
|
|
|
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
|
|
|
|
S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
|
|
|
|
assert(Fn && Addr && "Target device function emission failed.");
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
|
|
|
|
const OMPTargetTeamsDistributeDirective &S) {
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
emitTargetTeamsDistributeRegion(CGF, Action, S);
|
|
|
|
};
|
|
|
|
emitCommonOMPTargetDirective(*this, S, CodeGen);
|
|
|
|
}
|
|
|
|
|
2017-12-14 03:45:06 +08:00
|
|
|
static void emitTargetTeamsDistributeSimdRegion(
|
|
|
|
CodeGenFunction &CGF, PrePostActionTy &Action,
|
|
|
|
const OMPTargetTeamsDistributeSimdDirective &S) {
|
|
|
|
Action.Enter(CGF);
|
|
|
|
auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
|
|
|
|
CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
|
|
|
|
};
|
|
|
|
|
|
|
|
// Emit teams region as a standalone region.
|
|
|
|
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
|
2018-03-16 02:10:54 +08:00
|
|
|
PrePostActionTy &Action) {
|
2018-03-20 22:45:59 +08:00
|
|
|
Action.Enter(CGF);
|
2017-12-14 03:45:06 +08:00
|
|
|
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
|
|
|
|
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
|
|
|
|
(void)PrivateScope.Privatize();
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
|
|
|
|
CodeGenDistribute);
|
|
|
|
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
|
|
|
|
};
|
|
|
|
emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
|
|
|
|
emitPostUpdateForReductionClause(CGF, S,
|
|
|
|
[](CodeGenFunction &) { return nullptr; });
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
|
|
|
|
CodeGenModule &CGM, StringRef ParentName,
|
|
|
|
const OMPTargetTeamsDistributeSimdDirective &S) {
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
|
|
|
|
};
|
|
|
|
llvm::Function *Fn;
|
|
|
|
llvm::Constant *Addr;
|
|
|
|
// Emit target region as a standalone region.
|
|
|
|
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
|
|
|
|
S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
|
|
|
|
assert(Fn && Addr && "Target device function emission failed.");
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
|
|
|
|
const OMPTargetTeamsDistributeSimdDirective &S) {
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
|
|
|
|
};
|
|
|
|
emitCommonOMPTargetDirective(*this, S, CodeGen);
|
|
|
|
}
|
|
|
|
|
2017-10-04 22:12:09 +08:00
|
|
|
void CodeGenFunction::EmitOMPTeamsDistributeDirective(
|
|
|
|
const OMPTeamsDistributeDirective &S) {
|
|
|
|
|
|
|
|
auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
|
|
|
|
CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
|
|
|
|
};
|
|
|
|
|
|
|
|
// Emit teams region as a standalone region.
|
|
|
|
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
|
2018-03-16 02:10:54 +08:00
|
|
|
PrePostActionTy &Action) {
|
2018-03-20 22:45:59 +08:00
|
|
|
Action.Enter(CGF);
|
2017-10-04 22:12:09 +08:00
|
|
|
OMPPrivateScope PrivateScope(CGF);
|
|
|
|
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
|
|
|
|
(void)PrivateScope.Privatize();
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
|
|
|
|
CodeGenDistribute);
|
|
|
|
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
|
|
|
|
};
|
2017-11-29 23:14:16 +08:00
|
|
|
emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
|
2017-10-04 22:12:09 +08:00
|
|
|
emitPostUpdateForReductionClause(*this, S,
|
|
|
|
[](CodeGenFunction &) { return nullptr; });
|
2017-12-06 22:31:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
|
|
|
|
const OMPTeamsDistributeSimdDirective &S) {
|
|
|
|
auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
|
|
|
|
CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
|
|
|
|
};
|
|
|
|
|
|
|
|
// Emit teams region as a standalone region.
|
|
|
|
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
|
2018-03-16 02:10:54 +08:00
|
|
|
PrePostActionTy &Action) {
|
2018-03-20 22:45:59 +08:00
|
|
|
Action.Enter(CGF);
|
2017-12-06 22:31:09 +08:00
|
|
|
OMPPrivateScope PrivateScope(CGF);
|
|
|
|
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
|
|
|
|
(void)PrivateScope.Privatize();
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
|
|
|
|
CodeGenDistribute);
|
|
|
|
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
|
|
|
|
};
|
|
|
|
emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
|
|
|
|
emitPostUpdateForReductionClause(*this, S,
|
|
|
|
[](CodeGenFunction &) { return nullptr; });
|
2017-10-04 22:12:09 +08:00
|
|
|
}
|
|
|
|
|
2017-11-21 04:46:39 +08:00
|
|
|
void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
|
|
|
|
const OMPTeamsDistributeParallelForDirective &S) {
|
|
|
|
auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
|
|
|
|
CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
|
|
|
|
S.getDistInc());
|
|
|
|
};
|
|
|
|
|
|
|
|
// Emit teams region as a standalone region.
|
|
|
|
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
|
2018-03-16 02:10:54 +08:00
|
|
|
PrePostActionTy &Action) {
|
2018-03-20 22:45:59 +08:00
|
|
|
Action.Enter(CGF);
|
2017-11-21 04:46:39 +08:00
|
|
|
OMPPrivateScope PrivateScope(CGF);
|
|
|
|
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
|
|
|
|
(void)PrivateScope.Privatize();
|
2017-11-28 00:54:08 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
|
|
|
|
CodeGenDistribute);
|
2017-11-21 04:46:39 +08:00
|
|
|
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
|
|
|
|
};
|
|
|
|
emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
|
|
|
|
emitPostUpdateForReductionClause(*this, S,
|
|
|
|
[](CodeGenFunction &) { return nullptr; });
|
2017-12-05 04:57:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
|
|
|
|
const OMPTeamsDistributeParallelForSimdDirective &S) {
|
|
|
|
auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
|
|
|
|
CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
|
|
|
|
S.getDistInc());
|
|
|
|
};
|
|
|
|
|
|
|
|
// Emit teams region as a standalone region.
|
|
|
|
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
|
2018-03-16 02:10:54 +08:00
|
|
|
PrePostActionTy &Action) {
|
2018-03-20 22:45:59 +08:00
|
|
|
Action.Enter(CGF);
|
2017-12-05 04:57:19 +08:00
|
|
|
OMPPrivateScope PrivateScope(CGF);
|
|
|
|
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
|
|
|
|
(void)PrivateScope.Privatize();
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
|
|
|
|
CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
|
|
|
|
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
|
|
|
|
};
|
|
|
|
emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
|
|
|
|
emitPostUpdateForReductionClause(*this, S,
|
|
|
|
[](CodeGenFunction &) { return nullptr; });
|
2017-11-21 04:46:39 +08:00
|
|
|
}
|
|
|
|
|
2018-01-04 05:12:44 +08:00
|
|
|
static void emitTargetTeamsDistributeParallelForRegion(
|
|
|
|
CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
|
|
|
|
PrePostActionTy &Action) {
|
2018-03-01 04:48:35 +08:00
|
|
|
Action.Enter(CGF);
|
2018-01-04 05:12:44 +08:00
|
|
|
auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
|
|
|
|
CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
|
|
|
|
S.getDistInc());
|
|
|
|
};
|
|
|
|
|
|
|
|
// Emit teams region as a standalone region.
|
|
|
|
auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
|
2018-03-16 02:10:54 +08:00
|
|
|
PrePostActionTy &Action) {
|
2018-03-20 22:45:59 +08:00
|
|
|
Action.Enter(CGF);
|
2018-01-04 05:12:44 +08:00
|
|
|
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
|
|
|
|
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
|
|
|
|
(void)PrivateScope.Privatize();
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
|
|
|
|
CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
|
|
|
|
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
|
|
|
|
};
|
|
|
|
|
|
|
|
emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
|
|
|
|
CodeGenTeams);
|
|
|
|
emitPostUpdateForReductionClause(CGF, S,
|
|
|
|
[](CodeGenFunction &) { return nullptr; });
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
|
|
|
|
CodeGenModule &CGM, StringRef ParentName,
|
|
|
|
const OMPTargetTeamsDistributeParallelForDirective &S) {
|
|
|
|
// Emit SPMD target teams distribute parallel for region as a standalone
|
|
|
|
// region.
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
|
|
|
|
};
|
|
|
|
llvm::Function *Fn;
|
|
|
|
llvm::Constant *Addr;
|
|
|
|
// Emit target region as a standalone region.
|
|
|
|
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
|
|
|
|
S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
|
|
|
|
assert(Fn && Addr && "Target device function emission failed.");
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
|
|
|
|
const OMPTargetTeamsDistributeParallelForDirective &S) {
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
|
|
|
|
};
|
|
|
|
emitCommonOMPTargetDirective(*this, S, CodeGen);
|
|
|
|
}
|
|
|
|
|
2018-01-16 04:59:40 +08:00
|
|
|
static void emitTargetTeamsDistributeParallelForSimdRegion(
|
|
|
|
CodeGenFunction &CGF,
|
|
|
|
const OMPTargetTeamsDistributeParallelForSimdDirective &S,
|
|
|
|
PrePostActionTy &Action) {
|
2018-03-01 04:48:35 +08:00
|
|
|
Action.Enter(CGF);
|
2018-01-16 04:59:40 +08:00
|
|
|
auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
|
|
|
|
CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
|
|
|
|
S.getDistInc());
|
|
|
|
};
|
|
|
|
|
|
|
|
// Emit teams region as a standalone region.
|
|
|
|
auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
|
2018-03-16 02:10:54 +08:00
|
|
|
PrePostActionTy &Action) {
|
2018-03-20 22:45:59 +08:00
|
|
|
Action.Enter(CGF);
|
2018-01-16 04:59:40 +08:00
|
|
|
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
|
|
|
|
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
|
|
|
|
(void)PrivateScope.Privatize();
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
|
|
|
|
CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
|
|
|
|
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
|
|
|
|
};
|
|
|
|
|
|
|
|
emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
|
|
|
|
CodeGenTeams);
|
|
|
|
emitPostUpdateForReductionClause(CGF, S,
|
|
|
|
[](CodeGenFunction &) { return nullptr; });
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
|
|
|
|
CodeGenModule &CGM, StringRef ParentName,
|
|
|
|
const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
|
|
|
|
// Emit SPMD target teams distribute parallel for simd region as a standalone
|
|
|
|
// region.
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
|
|
|
|
};
|
|
|
|
llvm::Function *Fn;
|
|
|
|
llvm::Constant *Addr;
|
|
|
|
// Emit target region as a standalone region.
|
|
|
|
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
|
|
|
|
S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
|
|
|
|
assert(Fn && Addr && "Target device function emission failed.");
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
|
|
|
|
const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
|
|
|
|
};
|
|
|
|
emitCommonOMPTargetDirective(*this, S, CodeGen);
|
|
|
|
}
|
|
|
|
|
2015-07-01 14:57:41 +08:00
|
|
|
void CodeGenFunction::EmitOMPCancellationPointDirective(
|
|
|
|
const OMPCancellationPointDirective &S) {
|
2018-08-10 05:08:08 +08:00
|
|
|
CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(),
|
2015-07-02 12:17:07 +08:00
|
|
|
S.getCancelRegion());
|
2015-07-01 14:57:41 +08:00
|
|
|
}
|
|
|
|
|
2015-07-02 19:25:17 +08:00
|
|
|
void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
|
2015-09-18 16:07:34 +08:00
|
|
|
const Expr *IfCond = nullptr;
|
|
|
|
for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
|
|
|
|
if (C->getNameModifier() == OMPD_unknown ||
|
|
|
|
C->getNameModifier() == OMPD_cancel) {
|
|
|
|
IfCond = C->getCondition();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2018-08-10 05:08:08 +08:00
|
|
|
CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
|
2015-07-06 13:50:32 +08:00
|
|
|
S.getCancelRegion());
|
2015-07-02 19:25:17 +08:00
|
|
|
}
|
|
|
|
|
2015-07-03 17:56:58 +08:00
|
|
|
CodeGenFunction::JumpDest
|
|
|
|
CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
|
2016-11-17 23:12:05 +08:00
|
|
|
if (Kind == OMPD_parallel || Kind == OMPD_task ||
|
|
|
|
Kind == OMPD_target_parallel)
|
2015-07-03 17:56:58 +08:00
|
|
|
return ReturnBlock;
|
2015-09-15 20:52:43 +08:00
|
|
|
assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
|
2016-11-17 23:12:05 +08:00
|
|
|
Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
|
|
|
|
Kind == OMPD_distribute_parallel_for ||
|
2017-11-23 04:19:50 +08:00
|
|
|
Kind == OMPD_target_parallel_for ||
|
2017-11-23 05:12:03 +08:00
|
|
|
Kind == OMPD_teams_distribute_parallel_for ||
|
|
|
|
Kind == OMPD_target_teams_distribute_parallel_for);
|
2016-11-17 23:12:05 +08:00
|
|
|
return OMPCancelStack.getExitBlock();
|
2015-07-03 17:56:58 +08:00
|
|
|
}
|
2015-07-21 21:44:28 +08:00
|
|
|
|
2016-07-28 22:23:26 +08:00
|
|
|
void CodeGenFunction::EmitOMPUseDevicePtrClause(
|
|
|
|
const OMPClause &NC, OMPPrivateScope &PrivateScope,
|
|
|
|
const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
|
|
|
|
const auto &C = cast<OMPUseDevicePtrClause>(NC);
|
|
|
|
auto OrigVarIt = C.varlist_begin();
|
|
|
|
auto InitIt = C.inits().begin();
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *PvtVarIt : C.private_copies()) {
|
|
|
|
const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
|
|
|
|
const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
|
|
|
|
const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
|
2016-07-28 22:23:26 +08:00
|
|
|
|
|
|
|
// In order to identify the right initializer we need to match the
|
|
|
|
// declaration used by the mapping logic. In some cases we may get
|
|
|
|
// OMPCapturedExprDecl that refers to the original declaration.
|
|
|
|
const ValueDecl *MatchingVD = OrigVD;
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
|
2016-07-28 22:23:26 +08:00
|
|
|
// OMPCapturedExprDecl are used to privative fields of the current
|
|
|
|
// structure.
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *ME = cast<MemberExpr>(OED->getInit());
|
2016-07-28 22:23:26 +08:00
|
|
|
assert(isa<CXXThisExpr>(ME->getBase()) &&
|
|
|
|
"Base should be the current struct!");
|
|
|
|
MatchingVD = ME->getMemberDecl();
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we don't have information about the current list item, move on to
|
|
|
|
// the next one.
|
|
|
|
auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
|
|
|
|
if (InitAddrIt == CaptureDeviceAddrMap.end())
|
|
|
|
continue;
|
|
|
|
|
2018-04-14 01:31:06 +08:00
|
|
|
bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD,
|
|
|
|
InitAddrIt, InitVD,
|
|
|
|
PvtVD]() {
|
2016-07-28 22:23:26 +08:00
|
|
|
// Initialize the temporary initialization variable with the address we
|
|
|
|
// get from the runtime library. We have to cast the source address
|
|
|
|
// because it is always a void *. References are materialized in the
|
|
|
|
// privatization scope, so the initialization here disregards the fact
|
|
|
|
// the original variable is a reference.
|
|
|
|
QualType AddrQTy =
|
|
|
|
getContext().getPointerType(OrigVD->getType().getNonReferenceType());
|
|
|
|
llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
|
|
|
|
Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
|
|
|
|
setAddrOfLocalVar(InitVD, InitAddr);
|
|
|
|
|
|
|
|
// Emit private declaration, it will be initialized by the value we
|
|
|
|
// declaration we just added to the local declarations map.
|
|
|
|
EmitDecl(*PvtVD);
|
|
|
|
|
|
|
|
// The initialization variables reached its purpose in the emission
|
2018-04-06 23:14:32 +08:00
|
|
|
// of the previous declaration, so we don't need it anymore.
|
2016-07-28 22:23:26 +08:00
|
|
|
LocalDeclMap.erase(InitVD);
|
|
|
|
|
|
|
|
// Return the address of the private variable.
|
|
|
|
return GetAddrOfLocalVar(PvtVD);
|
|
|
|
});
|
|
|
|
assert(IsRegistered && "firstprivate var already registered as private");
|
|
|
|
// Silence the warning about unused variable.
|
|
|
|
(void)IsRegistered;
|
|
|
|
|
|
|
|
++OrigVarIt;
|
|
|
|
++InitIt;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-21 21:44:28 +08:00
|
|
|
// Generate the instructions for '#pragma omp target data' directive.
|
|
|
|
void CodeGenFunction::EmitOMPTargetDataDirective(
|
|
|
|
const OMPTargetDataDirective &S) {
|
2016-07-28 22:23:26 +08:00
|
|
|
CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
|
|
|
|
|
|
|
|
// Create a pre/post action to signal the privatization of the device pointer.
|
|
|
|
// This action can be replaced by the OpenMP runtime code generation to
|
|
|
|
// deactivate privatization.
|
|
|
|
bool PrivatizeDevicePointers = false;
|
|
|
|
class DevicePointerPrivActionTy : public PrePostActionTy {
|
|
|
|
bool &PrivatizeDevicePointers;
|
|
|
|
|
|
|
|
public:
|
|
|
|
explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
|
|
|
|
: PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
|
|
|
|
void Enter(CodeGenFunction &CGF) override {
|
|
|
|
PrivatizeDevicePointers = true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
|
|
|
|
|
|
|
|
auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
|
2018-01-13 03:39:11 +08:00
|
|
|
CodeGenFunction &CGF, PrePostActionTy &Action) {
|
2016-07-28 22:23:26 +08:00
|
|
|
auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
|
2018-01-13 03:39:11 +08:00
|
|
|
CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
|
2016-07-28 22:23:26 +08:00
|
|
|
};
|
|
|
|
|
2018-04-06 23:14:32 +08:00
|
|
|
// Codegen that selects whether to generate the privatization code or not.
|
2016-07-28 22:23:26 +08:00
|
|
|
auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
|
|
|
|
&InnermostCodeGen](CodeGenFunction &CGF,
|
|
|
|
PrePostActionTy &Action) {
|
|
|
|
RegionCodeGenTy RCG(InnermostCodeGen);
|
|
|
|
PrivatizeDevicePointers = false;
|
|
|
|
|
|
|
|
// Call the pre-action to change the status of PrivatizeDevicePointers if
|
|
|
|
// needed.
|
|
|
|
Action.Enter(CGF);
|
|
|
|
|
|
|
|
if (PrivatizeDevicePointers) {
|
|
|
|
OMPPrivateScope PrivateScope(CGF);
|
|
|
|
// Emit all instances of the use_device_ptr clause.
|
|
|
|
for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
|
|
|
|
CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
|
|
|
|
Info.CaptureDeviceAddrMap);
|
|
|
|
(void)PrivateScope.Privatize();
|
|
|
|
RCG(CGF);
|
2018-04-14 01:31:06 +08:00
|
|
|
} else {
|
2016-07-28 22:23:26 +08:00
|
|
|
RCG(CGF);
|
2018-04-14 01:31:06 +08:00
|
|
|
}
|
2016-07-28 22:23:26 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
// Forward the provided action to the privatization codegen.
|
|
|
|
RegionCodeGenTy PrivRCG(PrivCodeGen);
|
|
|
|
PrivRCG.setAction(Action);
|
|
|
|
|
|
|
|
// Notwithstanding the body of the region is emitted as inlined directive,
|
|
|
|
// we don't use an inline scope as changes in the references inside the
|
|
|
|
// region are expected to be visible outside, so we do not privative them.
|
|
|
|
OMPLexicalScope Scope(CGF, S);
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
|
|
|
|
PrivRCG);
|
2016-04-28 06:58:19 +08:00
|
|
|
};
|
|
|
|
|
2016-07-28 22:23:26 +08:00
|
|
|
RegionCodeGenTy RCG(CodeGen);
|
|
|
|
|
2016-04-28 06:58:19 +08:00
|
|
|
// If we don't have target devices, don't bother emitting the data mapping
|
|
|
|
// code.
|
|
|
|
if (CGM.getLangOpts().OMPTargetTriples.empty()) {
|
2016-07-28 22:23:26 +08:00
|
|
|
RCG(*this);
|
2016-04-28 06:58:19 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check if we have any if clause associated with the directive.
|
|
|
|
const Expr *IfCond = nullptr;
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const auto *C = S.getSingleClause<OMPIfClause>())
|
2016-04-28 06:58:19 +08:00
|
|
|
IfCond = C->getCondition();
|
|
|
|
|
|
|
|
// Check if we have any device clause associated with the directive.
|
|
|
|
const Expr *Device = nullptr;
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const auto *C = S.getSingleClause<OMPDeviceClause>())
|
2016-04-28 06:58:19 +08:00
|
|
|
Device = C->getDevice();
|
|
|
|
|
2016-07-28 22:23:26 +08:00
|
|
|
// Set the action to signal privatization of device pointers.
|
|
|
|
RCG.setAction(PrivAction);
|
|
|
|
|
|
|
|
// Emit region code.
|
|
|
|
CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
|
|
|
|
Info);
|
2015-07-21 21:44:28 +08:00
|
|
|
}
|
2015-12-01 12:18:41 +08:00
|
|
|
|
2016-01-20 03:15:56 +08:00
|
|
|
void CodeGenFunction::EmitOMPTargetEnterDataDirective(
|
|
|
|
const OMPTargetEnterDataDirective &S) {
|
2016-04-28 07:07:29 +08:00
|
|
|
// If we don't have target devices, don't bother emitting the data mapping
|
|
|
|
// code.
|
|
|
|
if (CGM.getLangOpts().OMPTargetTriples.empty())
|
|
|
|
return;
|
|
|
|
|
|
|
|
// Check if we have any if clause associated with the directive.
|
|
|
|
const Expr *IfCond = nullptr;
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const auto *C = S.getSingleClause<OMPIfClause>())
|
2016-04-28 07:07:29 +08:00
|
|
|
IfCond = C->getCondition();
|
|
|
|
|
|
|
|
// Check if we have any device clause associated with the directive.
|
|
|
|
const Expr *Device = nullptr;
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const auto *C = S.getSingleClause<OMPDeviceClause>())
|
2016-04-28 07:07:29 +08:00
|
|
|
Device = C->getDevice();
|
|
|
|
|
2018-01-13 03:39:11 +08:00
|
|
|
OMPLexicalScope Scope(*this, S, OMPD_task);
|
2017-12-28 01:58:32 +08:00
|
|
|
CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
|
2016-01-20 03:15:56 +08:00
|
|
|
}
|
|
|
|
|
2016-01-20 04:04:50 +08:00
|
|
|
void CodeGenFunction::EmitOMPTargetExitDataDirective(
|
|
|
|
const OMPTargetExitDataDirective &S) {
|
2016-04-28 07:14:30 +08:00
|
|
|
// If we don't have target devices, don't bother emitting the data mapping
|
|
|
|
// code.
|
|
|
|
if (CGM.getLangOpts().OMPTargetTriples.empty())
|
|
|
|
return;
|
|
|
|
|
|
|
|
// Check if we have any if clause associated with the directive.
|
|
|
|
const Expr *IfCond = nullptr;
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const auto *C = S.getSingleClause<OMPIfClause>())
|
2016-04-28 07:14:30 +08:00
|
|
|
IfCond = C->getCondition();
|
|
|
|
|
|
|
|
// Check if we have any device clause associated with the directive.
|
|
|
|
const Expr *Device = nullptr;
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const auto *C = S.getSingleClause<OMPDeviceClause>())
|
2016-04-28 07:14:30 +08:00
|
|
|
Device = C->getDevice();
|
|
|
|
|
2018-01-13 03:39:11 +08:00
|
|
|
OMPLexicalScope Scope(*this, S, OMPD_task);
|
2017-12-28 01:58:32 +08:00
|
|
|
CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
|
2016-01-20 04:04:50 +08:00
|
|
|
}
|
|
|
|
|
2017-01-19 02:18:53 +08:00
|
|
|
static void emitTargetParallelRegion(CodeGenFunction &CGF,
|
|
|
|
const OMPTargetParallelDirective &S,
|
|
|
|
PrePostActionTy &Action) {
|
|
|
|
// Get the captured statement associated with the 'parallel' region.
|
2018-04-14 01:31:06 +08:00
|
|
|
const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
|
2017-01-19 02:18:53 +08:00
|
|
|
Action.Enter(CGF);
|
2018-03-16 02:10:54 +08:00
|
|
|
auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
2018-03-20 22:45:59 +08:00
|
|
|
Action.Enter(CGF);
|
2017-02-17 00:20:16 +08:00
|
|
|
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
|
|
|
|
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
|
|
|
|
CGF.EmitOMPPrivateClause(S, PrivateScope);
|
|
|
|
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
|
|
|
|
(void)PrivateScope.Privatize();
|
2018-10-30 23:50:12 +08:00
|
|
|
if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
|
|
|
|
CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
|
2017-01-19 02:18:53 +08:00
|
|
|
// TODO: Add support for clauses.
|
|
|
|
CGF.EmitStmt(CS->getCapturedStmt());
|
2017-02-17 00:20:16 +08:00
|
|
|
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
|
2017-01-19 02:18:53 +08:00
|
|
|
};
|
2017-04-26 01:52:12 +08:00
|
|
|
emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
|
|
|
|
emitEmptyBoundParameters);
|
2018-04-14 01:31:06 +08:00
|
|
|
emitPostUpdateForReductionClause(CGF, S,
|
|
|
|
[](CodeGenFunction &) { return nullptr; });
|
2017-01-19 02:18:53 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
|
|
|
|
CodeGenModule &CGM, StringRef ParentName,
|
|
|
|
const OMPTargetParallelDirective &S) {
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
emitTargetParallelRegion(CGF, S, Action);
|
|
|
|
};
|
|
|
|
llvm::Function *Fn;
|
|
|
|
llvm::Constant *Addr;
|
|
|
|
// Emit target region as a standalone region.
|
|
|
|
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
|
|
|
|
S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
|
|
|
|
assert(Fn && Addr && "Target device function emission failed.");
|
|
|
|
}
|
|
|
|
|
2016-01-27 02:48:41 +08:00
|
|
|
void CodeGenFunction::EmitOMPTargetParallelDirective(
|
|
|
|
const OMPTargetParallelDirective &S) {
|
2017-01-19 02:18:53 +08:00
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
emitTargetParallelRegion(CGF, S, Action);
|
|
|
|
};
|
|
|
|
emitCommonOMPTargetDirective(*this, S, CodeGen);
|
2016-01-27 02:48:41 +08:00
|
|
|
}
|
|
|
|
|
2017-11-09 04:16:14 +08:00
|
|
|
static void emitTargetParallelForRegion(CodeGenFunction &CGF,
|
|
|
|
const OMPTargetParallelForDirective &S,
|
|
|
|
PrePostActionTy &Action) {
|
|
|
|
Action.Enter(CGF);
|
|
|
|
// Emit directive as a combined directive that consists of two implicit
|
|
|
|
// directives: 'parallel' with 'for' directive.
|
2018-03-16 02:10:54 +08:00
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
Action.Enter(CGF);
|
2017-11-17 02:20:21 +08:00
|
|
|
CodeGenFunction::OMPCancelStackRAII CancelRegion(
|
|
|
|
CGF, OMPD_target_parallel_for, S.hasCancel());
|
2017-11-09 04:16:14 +08:00
|
|
|
CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
|
|
|
|
emitDispatchForLoopBounds);
|
|
|
|
};
|
|
|
|
emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
|
|
|
|
emitEmptyBoundParameters);
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
|
|
|
|
CodeGenModule &CGM, StringRef ParentName,
|
|
|
|
const OMPTargetParallelForDirective &S) {
|
|
|
|
// Emit SPMD target parallel for region as a standalone region.
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
emitTargetParallelForRegion(CGF, S, Action);
|
|
|
|
};
|
|
|
|
llvm::Function *Fn;
|
|
|
|
llvm::Constant *Addr;
|
|
|
|
// Emit target region as a standalone region.
|
|
|
|
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
|
|
|
|
S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
|
|
|
|
assert(Fn && Addr && "Target device function emission failed.");
|
|
|
|
}
|
|
|
|
|
2016-02-03 23:46:42 +08:00
|
|
|
void CodeGenFunction::EmitOMPTargetParallelForDirective(
|
|
|
|
const OMPTargetParallelForDirective &S) {
|
2017-11-09 04:16:14 +08:00
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
emitTargetParallelForRegion(CGF, S, Action);
|
|
|
|
};
|
|
|
|
emitCommonOMPTargetDirective(*this, S, CodeGen);
|
2016-02-03 23:46:42 +08:00
|
|
|
}
|
|
|
|
|
2017-11-10 01:32:15 +08:00
|
|
|
static void
|
|
|
|
emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
|
|
|
|
const OMPTargetParallelForSimdDirective &S,
|
|
|
|
PrePostActionTy &Action) {
|
|
|
|
Action.Enter(CGF);
|
|
|
|
// Emit directive as a combined directive that consists of two implicit
|
|
|
|
// directives: 'parallel' with 'for' directive.
|
2018-03-16 02:10:54 +08:00
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
Action.Enter(CGF);
|
2017-11-10 01:32:15 +08:00
|
|
|
CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
|
|
|
|
emitDispatchForLoopBounds);
|
|
|
|
};
|
|
|
|
emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
|
|
|
|
emitEmptyBoundParameters);
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
|
|
|
|
CodeGenModule &CGM, StringRef ParentName,
|
|
|
|
const OMPTargetParallelForSimdDirective &S) {
|
|
|
|
// Emit SPMD target parallel for region as a standalone region.
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
emitTargetParallelForSimdRegion(CGF, S, Action);
|
|
|
|
};
|
|
|
|
llvm::Function *Fn;
|
|
|
|
llvm::Constant *Addr;
|
|
|
|
// Emit target region as a standalone region.
|
|
|
|
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
|
|
|
|
S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
|
|
|
|
assert(Fn && Addr && "Target device function emission failed.");
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
|
|
|
|
const OMPTargetParallelForSimdDirective &S) {
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
emitTargetParallelForSimdRegion(CGF, S, Action);
|
|
|
|
};
|
|
|
|
emitCommonOMPTargetDirective(*this, S, CodeGen);
|
|
|
|
}
|
|
|
|
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
/// Emit a helper variable and return corresponding lvalue.
|
|
|
|
static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
|
|
|
|
const ImplicitParamDecl *PVD,
|
|
|
|
CodeGenFunction::OMPPrivateScope &Privates) {
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *VDecl = cast<VarDecl>(Helper->getDecl());
|
|
|
|
Privates.addPrivate(VDecl,
|
|
|
|
[&CGF, PVD]() { return CGF.GetAddrOfLocalVar(PVD); });
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
|
|
|
|
assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
|
|
|
|
// Emit outlined function for task construct.
|
2018-01-13 03:39:11 +08:00
|
|
|
const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
|
2018-04-14 01:31:06 +08:00
|
|
|
Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
|
|
|
|
QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
const Expr *IfCond = nullptr;
|
|
|
|
for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
|
|
|
|
if (C->getNameModifier() == OMPD_unknown ||
|
|
|
|
C->getNameModifier() == OMPD_taskloop) {
|
|
|
|
IfCond = C->getCondition();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2016-04-28 17:23:51 +08:00
|
|
|
|
|
|
|
OMPTaskDataTy Data;
|
|
|
|
// Check if taskloop must be emitted without taskgroup.
|
|
|
|
Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
// TODO: Check if we should emit tied or untied task.
|
2016-04-28 17:23:51 +08:00
|
|
|
Data.Tied = true;
|
|
|
|
// Set scheduling for taskloop
|
[OPENMP 4.5] Codegen for 'grainsize/num_tasks' clauses of 'taskloop'
directive.
OpenMP 4.5 defines 'taskloop' directive and 2 additional clauses
'grainsize' and 'num_tasks' for this directive. Patch adds codegen for
these clauses.
These clauses are generated as arguments of the '__kmpc_taskloop'
libcall and are encoded the following way:
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup);
If 'grainsize' is specified, 'sched' argument must be set to '1' and
'grainsize' argument must be set to the value of the 'grainsize' clause.
If 'num_tasks' is specified, 'sched' argument must be set to '2' and
'grainsize' argument must be set to the value of the 'num_tasks' clause.
It is possible because these 2 clauses are mutually exclusive and can't
be used at the same time on the same directive.
If none of these clauses is specified, 'sched' argument must be set to
'0'.
llvm-svn: 267862
2016-04-28 17:15:06 +08:00
|
|
|
if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
|
|
|
|
// grainsize clause
|
2016-04-28 17:23:51 +08:00
|
|
|
Data.Schedule.setInt(/*IntVal=*/false);
|
|
|
|
Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
|
[OPENMP 4.5] Codegen for 'grainsize/num_tasks' clauses of 'taskloop'
directive.
OpenMP 4.5 defines 'taskloop' directive and 2 additional clauses
'grainsize' and 'num_tasks' for this directive. Patch adds codegen for
these clauses.
These clauses are generated as arguments of the '__kmpc_taskloop'
libcall and are encoded the following way:
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup);
If 'grainsize' is specified, 'sched' argument must be set to '1' and
'grainsize' argument must be set to the value of the 'grainsize' clause.
If 'num_tasks' is specified, 'sched' argument must be set to '2' and
'grainsize' argument must be set to the value of the 'num_tasks' clause.
It is possible because these 2 clauses are mutually exclusive and can't
be used at the same time on the same directive.
If none of these clauses is specified, 'sched' argument must be set to
'0'.
llvm-svn: 267862
2016-04-28 17:15:06 +08:00
|
|
|
} else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
|
|
|
|
// num_tasks clause
|
2016-04-28 17:23:51 +08:00
|
|
|
Data.Schedule.setInt(/*IntVal=*/true);
|
|
|
|
Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
|
[OPENMP 4.5] Codegen for 'grainsize/num_tasks' clauses of 'taskloop'
directive.
OpenMP 4.5 defines 'taskloop' directive and 2 additional clauses
'grainsize' and 'num_tasks' for this directive. Patch adds codegen for
these clauses.
These clauses are generated as arguments of the '__kmpc_taskloop'
libcall and are encoded the following way:
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup);
If 'grainsize' is specified, 'sched' argument must be set to '1' and
'grainsize' argument must be set to the value of the 'grainsize' clause.
If 'num_tasks' is specified, 'sched' argument must be set to '2' and
'grainsize' argument must be set to the value of the 'num_tasks' clause.
It is possible because these 2 clauses are mutually exclusive and can't
be used at the same time on the same directive.
If none of these clauses is specified, 'sched' argument must be set to
'0'.
llvm-svn: 267862
2016-04-28 17:15:06 +08:00
|
|
|
}
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
|
|
|
|
auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
|
|
|
|
// if (PreCond) {
|
|
|
|
// for (IV in 0..LastIteration) BODY;
|
|
|
|
// <Final counter/linear vars updates>;
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
|
|
|
|
// Emit: if (PreCond) - begin.
|
|
|
|
// If the condition constant folds and can be elided, avoid emitting the
|
|
|
|
// whole loop.
|
|
|
|
bool CondConstant;
|
|
|
|
llvm::BasicBlock *ContBlock = nullptr;
|
|
|
|
OMPLoopScope PreInitScope(CGF, S);
|
|
|
|
if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
|
|
|
|
if (!CondConstant)
|
|
|
|
return;
|
|
|
|
} else {
|
2018-04-14 01:31:06 +08:00
|
|
|
llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
ContBlock = CGF.createBasicBlock("taskloop.if.end");
|
|
|
|
emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
|
|
|
|
CGF.getProfileCount(&S));
|
|
|
|
CGF.EmitBlock(ThenBlock);
|
|
|
|
CGF.incrementProfileCounter(&S);
|
|
|
|
}
|
|
|
|
|
2016-04-28 20:14:51 +08:00
|
|
|
if (isOpenMPSimdDirective(S.getDirectiveKind()))
|
|
|
|
CGF.EmitOMPSimdInit(S);
|
|
|
|
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
OMPPrivateScope LoopScope(CGF);
|
|
|
|
// Emit helper vars inits.
|
|
|
|
enum { LowerBound = 5, UpperBound, Stride, LastIter };
|
|
|
|
auto *I = CS->getCapturedDecl()->param_begin();
|
|
|
|
auto *LBP = std::next(I, LowerBound);
|
|
|
|
auto *UBP = std::next(I, UpperBound);
|
|
|
|
auto *STP = std::next(I, Stride);
|
|
|
|
auto *LIP = std::next(I, LastIter);
|
|
|
|
mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
|
|
|
|
LoopScope);
|
|
|
|
mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
|
|
|
|
LoopScope);
|
|
|
|
mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
|
|
|
|
mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
|
|
|
|
LoopScope);
|
|
|
|
CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
|
2016-05-05 16:46:22 +08:00
|
|
|
bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
(void)LoopScope.Privatize();
|
|
|
|
// Emit the loop iteration variable.
|
|
|
|
const Expr *IVExpr = S.getIterationVariable();
|
2018-04-14 01:31:06 +08:00
|
|
|
const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
CGF.EmitVarDecl(*IVDecl);
|
|
|
|
CGF.EmitIgnoredExpr(S.getInit());
|
|
|
|
|
|
|
|
// Emit the iterations count variable.
|
|
|
|
// If it is not a variable, Sema decided to calculate iterations count on
|
|
|
|
// each iteration (e.g., it is foldable into a constant).
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
|
|
|
|
// Emit calculation of the iterations count.
|
|
|
|
CGF.EmitIgnoredExpr(S.getCalcLastIteration());
|
|
|
|
}
|
|
|
|
|
|
|
|
CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
|
|
|
|
S.getInc(),
|
|
|
|
[&S](CodeGenFunction &CGF) {
|
|
|
|
CGF.EmitOMPLoopBody(S, JumpDest());
|
|
|
|
CGF.EmitStopPoint(&S);
|
|
|
|
},
|
|
|
|
[](CodeGenFunction &) {});
|
|
|
|
// Emit: if (PreCond) - end.
|
|
|
|
if (ContBlock) {
|
|
|
|
CGF.EmitBranch(ContBlock);
|
|
|
|
CGF.EmitBlock(ContBlock, true);
|
|
|
|
}
|
2016-05-05 16:46:22 +08:00
|
|
|
// Emit final copy of the lastprivate variables if IsLastIter != 0.
|
|
|
|
if (HasLastprivateClause) {
|
|
|
|
CGF.EmitOMPLastprivateClauseFinal(
|
|
|
|
S, isOpenMPSimdDirective(S.getDirectiveKind()),
|
|
|
|
CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
|
|
|
|
CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
|
2018-08-10 05:08:08 +08:00
|
|
|
(*LIP)->getType(), S.getBeginLoc())));
|
2016-05-05 16:46:22 +08:00
|
|
|
}
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
};
|
2016-04-28 17:23:51 +08:00
|
|
|
auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
|
2019-02-06 00:42:33 +08:00
|
|
|
IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
|
2016-04-28 17:23:51 +08:00
|
|
|
const OMPTaskDataTy &Data) {
|
2018-04-14 01:31:06 +08:00
|
|
|
auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
|
|
|
|
&Data](CodeGenFunction &CGF, PrePostActionTy &) {
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
OMPLoopScope PreInitScope(CGF, S);
|
2018-08-10 05:08:08 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S,
|
2016-04-28 17:23:51 +08:00
|
|
|
OutlinedFn, SharedsTy,
|
|
|
|
CapturedStruct, IfCond, Data);
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
};
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
|
|
|
|
CodeGen);
|
|
|
|
};
|
2018-01-13 03:39:11 +08:00
|
|
|
if (Data.Nogroup) {
|
|
|
|
EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
|
|
|
|
} else {
|
2017-07-13 02:09:32 +08:00
|
|
|
CGM.getOpenMPRuntime().emitTaskgroupRegion(
|
|
|
|
*this,
|
|
|
|
[&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
|
|
|
|
PrePostActionTy &Action) {
|
|
|
|
Action.Enter(CGF);
|
2018-01-13 03:39:11 +08:00
|
|
|
CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
|
|
|
|
Data);
|
2017-07-13 02:09:32 +08:00
|
|
|
},
|
2018-08-10 05:08:08 +08:00
|
|
|
S.getBeginLoc());
|
2017-07-13 02:09:32 +08:00
|
|
|
}
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
}
|
|
|
|
|
2015-12-01 12:18:41 +08:00
|
|
|
void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
|
[OPENMP 4.5] Codegen for 'taskloop' directive.
The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
llvm-svn: 267395
2016-04-25 20:22:29 +08:00
|
|
|
EmitOMPTaskLoopBasedDirective(S);
|
2015-12-01 12:18:41 +08:00
|
|
|
}
|
|
|
|
|
2015-12-03 17:40:15 +08:00
|
|
|
void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
|
|
|
|
const OMPTaskLoopSimdDirective &S) {
|
2016-04-28 20:14:51 +08:00
|
|
|
EmitOMPTaskLoopBasedDirective(S);
|
2015-12-03 17:40:15 +08:00
|
|
|
}
|
2016-05-27 01:30:50 +08:00
|
|
|
|
|
|
|
// Generate the instructions for '#pragma omp target update' directive.
|
|
|
|
void CodeGenFunction::EmitOMPTargetUpdateDirective(
|
|
|
|
const OMPTargetUpdateDirective &S) {
|
2016-05-27 02:30:22 +08:00
|
|
|
// If we don't have target devices, don't bother emitting the data mapping
|
|
|
|
// code.
|
|
|
|
if (CGM.getLangOpts().OMPTargetTriples.empty())
|
|
|
|
return;
|
|
|
|
|
|
|
|
// Check if we have any if clause associated with the directive.
|
|
|
|
const Expr *IfCond = nullptr;
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const auto *C = S.getSingleClause<OMPIfClause>())
|
2016-05-27 02:30:22 +08:00
|
|
|
IfCond = C->getCondition();
|
|
|
|
|
|
|
|
// Check if we have any device clause associated with the directive.
|
|
|
|
const Expr *Device = nullptr;
|
2018-04-14 01:31:06 +08:00
|
|
|
if (const auto *C = S.getSingleClause<OMPDeviceClause>())
|
2016-05-27 02:30:22 +08:00
|
|
|
Device = C->getDevice();
|
|
|
|
|
2018-01-13 03:39:11 +08:00
|
|
|
OMPLexicalScope Scope(*this, S, OMPD_task);
|
2017-12-28 01:58:32 +08:00
|
|
|
CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
|
2016-05-27 01:30:50 +08:00
|
|
|
}
|
2017-12-30 02:07:07 +08:00
|
|
|
|
|
|
|
void CodeGenFunction::EmitSimpleOMPExecutableDirective(
|
|
|
|
const OMPExecutableDirective &D) {
|
|
|
|
if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
|
|
|
|
return;
|
|
|
|
auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
|
|
|
if (isOpenMPSimdDirective(D.getDirectiveKind())) {
|
|
|
|
emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
|
|
|
|
} else {
|
2018-10-29 23:01:58 +08:00
|
|
|
OMPPrivateScope LoopGlobals(CGF);
|
2017-12-30 02:07:07 +08:00
|
|
|
if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
|
2018-04-14 01:31:06 +08:00
|
|
|
for (const Expr *E : LD->counters()) {
|
2018-10-29 23:01:58 +08:00
|
|
|
const auto *VD = dyn_cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
|
|
|
if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {
|
|
|
|
LValue GlobLVal = CGF.EmitLValue(E);
|
|
|
|
LoopGlobals.addPrivate(
|
|
|
|
VD, [&GlobLVal]() { return GlobLVal.getAddress(); });
|
|
|
|
}
|
2018-10-30 16:49:26 +08:00
|
|
|
if (isa<OMPCapturedExprDecl>(VD)) {
|
2017-12-30 02:07:07 +08:00
|
|
|
// Emit only those that were not explicitly referenced in clauses.
|
|
|
|
if (!CGF.LocalDeclMap.count(VD))
|
|
|
|
CGF.EmitVarDecl(*VD);
|
|
|
|
}
|
|
|
|
}
|
2018-08-14 03:04:24 +08:00
|
|
|
for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
|
|
|
|
if (!C->getNumForLoops())
|
|
|
|
continue;
|
|
|
|
for (unsigned I = LD->getCollapsedNumber(),
|
|
|
|
E = C->getLoopNumIterations().size();
|
|
|
|
I < E; ++I) {
|
|
|
|
if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
|
2018-09-21 01:19:41 +08:00
|
|
|
cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) {
|
2018-08-14 03:04:24 +08:00
|
|
|
// Emit only those that were not explicitly referenced in clauses.
|
|
|
|
if (!CGF.LocalDeclMap.count(VD))
|
|
|
|
CGF.EmitVarDecl(*VD);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2017-12-30 02:07:07 +08:00
|
|
|
}
|
2018-10-29 23:01:58 +08:00
|
|
|
LoopGlobals.Privatize();
|
2018-01-13 03:39:11 +08:00
|
|
|
CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
|
2017-12-30 02:07:07 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
OMPSimdLexicalScope Scope(*this, D);
|
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(
|
|
|
|
*this,
|
|
|
|
isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
|
|
|
|
: D.getDirectiveKind(),
|
|
|
|
CodeGen);
|
|
|
|
}
|