2014-05-06 18:08:46 +08:00
|
|
|
//===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This contains code to emit OpenMP nodes as LLVM code.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "CGOpenMPRuntime.h"
|
|
|
|
#include "CodeGenFunction.h"
|
|
|
|
#include "CodeGenModule.h"
|
2015-01-14 19:29:14 +08:00
|
|
|
#include "TargetInfo.h"
|
2014-05-06 18:08:46 +08:00
|
|
|
#include "clang/AST/Stmt.h"
|
|
|
|
#include "clang/AST/StmtOpenMP.h"
|
|
|
|
using namespace clang;
|
|
|
|
using namespace CodeGen;
|
|
|
|
|
2016-01-26 20:20:39 +08:00
|
|
|
llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
|
|
|
|
auto &C = getContext();
|
|
|
|
llvm::Value *Size = nullptr;
|
|
|
|
auto SizeInChars = C.getTypeSizeInChars(Ty);
|
|
|
|
if (SizeInChars.isZero()) {
|
|
|
|
// getTypeSizeInChars() returns 0 for a VLA.
|
|
|
|
while (auto *VAT = C.getAsVariableArrayType(Ty)) {
|
|
|
|
llvm::Value *ArraySize;
|
|
|
|
std::tie(ArraySize, Ty) = getVLASize(VAT);
|
|
|
|
Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
|
|
|
|
}
|
|
|
|
SizeInChars = C.getTypeSizeInChars(Ty);
|
|
|
|
if (SizeInChars.isZero())
|
|
|
|
return llvm::ConstantInt::get(SizeTy, /*V=*/0);
|
|
|
|
Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
|
|
|
|
} else
|
|
|
|
Size = CGM.getSize(SizeInChars);
|
|
|
|
return Size;
|
|
|
|
}
|
|
|
|
|
2015-09-10 16:12:02 +08:00
|
|
|
void CodeGenFunction::GenerateOpenMPCapturedVars(
|
2015-12-03 01:44:43 +08:00
|
|
|
const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
|
2015-09-10 16:12:02 +08:00
|
|
|
const RecordDecl *RD = S.getCapturedRecordDecl();
|
|
|
|
auto CurField = RD->field_begin();
|
|
|
|
auto CurCap = S.captures().begin();
|
|
|
|
for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
|
|
|
|
E = S.capture_init_end();
|
|
|
|
I != E; ++I, ++CurField, ++CurCap) {
|
|
|
|
if (CurField->hasCapturedVLAType()) {
|
|
|
|
auto VAT = CurField->getCapturedVLAType();
|
2015-10-03 00:14:20 +08:00
|
|
|
auto *Val = VLASizeMap[VAT->getSizeExpr()];
|
|
|
|
CapturedVars.push_back(Val);
|
2015-09-10 16:12:02 +08:00
|
|
|
} else if (CurCap->capturesThis())
|
|
|
|
CapturedVars.push_back(CXXThisValue);
|
2015-12-03 01:44:43 +08:00
|
|
|
else if (CurCap->capturesVariableByCopy())
|
|
|
|
CapturedVars.push_back(
|
|
|
|
EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal());
|
|
|
|
else {
|
|
|
|
assert(CurCap->capturesVariable() && "Expected capture by reference.");
|
2015-09-10 16:12:02 +08:00
|
|
|
CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
|
2015-12-03 01:44:43 +08:00
|
|
|
}
|
2015-09-10 16:12:02 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-12-03 01:44:43 +08:00
|
|
|
static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
|
|
|
|
StringRef Name, LValue AddrLV,
|
|
|
|
bool isReferenceType = false) {
|
|
|
|
ASTContext &Ctx = CGF.getContext();
|
|
|
|
|
|
|
|
auto *CastedPtr = CGF.EmitScalarConversion(
|
|
|
|
AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
|
|
|
|
Ctx.getPointerType(DstType), SourceLocation());
|
|
|
|
auto TmpAddr =
|
|
|
|
CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
|
|
|
|
.getAddress();
|
|
|
|
|
|
|
|
// If we are dealing with references we need to return the address of the
|
|
|
|
// reference instead of the reference of the value.
|
|
|
|
if (isReferenceType) {
|
|
|
|
QualType RefType = Ctx.getLValueReferenceType(DstType);
|
|
|
|
auto *RefVal = TmpAddr.getPointer();
|
|
|
|
TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
|
|
|
|
auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
|
|
|
|
CGF.EmitScalarInit(RefVal, TmpLVal);
|
|
|
|
}
|
|
|
|
|
|
|
|
return TmpAddr;
|
|
|
|
}
|
|
|
|
|
2015-09-10 16:12:02 +08:00
|
|
|
llvm::Function *
|
2015-12-03 01:44:43 +08:00
|
|
|
CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
|
2015-09-10 16:12:02 +08:00
|
|
|
assert(
|
|
|
|
CapturedStmtInfo &&
|
|
|
|
"CapturedStmtInfo should be set when generating the captured function");
|
|
|
|
const CapturedDecl *CD = S.getCapturedDecl();
|
|
|
|
const RecordDecl *RD = S.getCapturedRecordDecl();
|
|
|
|
assert(CD->hasBody() && "missing CapturedDecl body");
|
|
|
|
|
|
|
|
// Build the argument list.
|
|
|
|
ASTContext &Ctx = CGM.getContext();
|
|
|
|
FunctionArgList Args;
|
|
|
|
Args.append(CD->param_begin(),
|
|
|
|
std::next(CD->param_begin(), CD->getContextParamPosition()));
|
|
|
|
auto I = S.captures().begin();
|
|
|
|
for (auto *FD : RD->fields()) {
|
|
|
|
QualType ArgType = FD->getType();
|
|
|
|
IdentifierInfo *II = nullptr;
|
|
|
|
VarDecl *CapVar = nullptr;
|
2015-12-03 01:44:43 +08:00
|
|
|
|
|
|
|
// If this is a capture by copy and the type is not a pointer, the outlined
|
|
|
|
// function argument type should be uintptr and the value properly casted to
|
|
|
|
// uintptr. This is necessary given that the runtime library is only able to
|
|
|
|
// deal with pointers. We can pass in the same way the VLA type sizes to the
|
|
|
|
// outlined function.
|
|
|
|
if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
|
|
|
|
I->capturesVariableArrayType())
|
|
|
|
ArgType = Ctx.getUIntPtrType();
|
|
|
|
|
|
|
|
if (I->capturesVariable() || I->capturesVariableByCopy()) {
|
2015-09-10 16:12:02 +08:00
|
|
|
CapVar = I->getCapturedVar();
|
|
|
|
II = CapVar->getIdentifier();
|
|
|
|
} else if (I->capturesThis())
|
|
|
|
II = &getContext().Idents.get("this");
|
|
|
|
else {
|
|
|
|
assert(I->capturesVariableArrayType());
|
|
|
|
II = &getContext().Idents.get("vla");
|
|
|
|
}
|
|
|
|
if (ArgType->isVariablyModifiedType())
|
|
|
|
ArgType = getContext().getVariableArrayDecayedType(ArgType);
|
|
|
|
Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr,
|
|
|
|
FD->getLocation(), II, ArgType));
|
|
|
|
++I;
|
|
|
|
}
|
|
|
|
Args.append(
|
|
|
|
std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
|
|
|
|
CD->param_end());
|
|
|
|
|
|
|
|
// Create the function declaration.
|
|
|
|
FunctionType::ExtInfo ExtInfo;
|
|
|
|
const CGFunctionInfo &FuncInfo =
|
|
|
|
CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo,
|
|
|
|
/*IsVariadic=*/false);
|
|
|
|
llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
|
|
|
|
|
|
|
|
llvm::Function *F = llvm::Function::Create(
|
|
|
|
FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
|
|
|
|
CapturedStmtInfo->getHelperName(), &CGM.getModule());
|
|
|
|
CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
|
|
|
|
if (CD->isNothrow())
|
|
|
|
F->addFnAttr(llvm::Attribute::NoUnwind);
|
|
|
|
|
|
|
|
// Generate the function.
|
|
|
|
StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
|
|
|
|
CD->getBody()->getLocStart());
|
|
|
|
unsigned Cnt = CD->getContextParamPosition();
|
|
|
|
I = S.captures().begin();
|
|
|
|
for (auto *FD : RD->fields()) {
|
2015-12-03 01:44:43 +08:00
|
|
|
// If we are capturing a pointer by copy we don't need to do anything, just
|
|
|
|
// use the value that we get from the arguments.
|
|
|
|
if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
|
|
|
|
setAddrOfLocalVar(I->getCapturedVar(), GetAddrOfLocalVar(Args[Cnt]));
|
|
|
|
++Cnt, ++I;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2015-09-10 16:12:02 +08:00
|
|
|
LValue ArgLVal =
|
|
|
|
MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(),
|
|
|
|
AlignmentSource::Decl);
|
|
|
|
if (FD->hasCapturedVLAType()) {
|
2015-12-03 01:44:43 +08:00
|
|
|
LValue CastedArgLVal =
|
|
|
|
MakeAddrLValue(castValueFromUintptr(*this, FD->getType(),
|
|
|
|
Args[Cnt]->getName(), ArgLVal),
|
|
|
|
FD->getType(), AlignmentSource::Decl);
|
2015-09-10 16:12:02 +08:00
|
|
|
auto *ExprArg =
|
2015-12-03 01:44:43 +08:00
|
|
|
EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal();
|
2015-09-10 16:12:02 +08:00
|
|
|
auto VAT = FD->getCapturedVLAType();
|
|
|
|
VLASizeMap[VAT->getSizeExpr()] = ExprArg;
|
|
|
|
} else if (I->capturesVariable()) {
|
|
|
|
auto *Var = I->getCapturedVar();
|
|
|
|
QualType VarTy = Var->getType();
|
|
|
|
Address ArgAddr = ArgLVal.getAddress();
|
|
|
|
if (!VarTy->isReferenceType()) {
|
|
|
|
ArgAddr = EmitLoadOfReference(
|
|
|
|
ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
|
|
|
|
}
|
2015-09-11 18:29:41 +08:00
|
|
|
setAddrOfLocalVar(
|
|
|
|
Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var)));
|
2015-12-03 01:44:43 +08:00
|
|
|
} else if (I->capturesVariableByCopy()) {
|
|
|
|
assert(!FD->getType()->isAnyPointerType() &&
|
|
|
|
"Not expecting a captured pointer.");
|
|
|
|
auto *Var = I->getCapturedVar();
|
|
|
|
QualType VarTy = Var->getType();
|
|
|
|
setAddrOfLocalVar(I->getCapturedVar(),
|
|
|
|
castValueFromUintptr(*this, FD->getType(),
|
|
|
|
Args[Cnt]->getName(), ArgLVal,
|
|
|
|
VarTy->isReferenceType()));
|
2015-09-10 16:12:02 +08:00
|
|
|
} else {
|
|
|
|
// If 'this' is captured, load it into CXXThisValue.
|
|
|
|
assert(I->capturesThis());
|
|
|
|
CXXThisValue =
|
|
|
|
EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal();
|
|
|
|
}
|
|
|
|
++Cnt, ++I;
|
|
|
|
}
|
|
|
|
|
2015-12-06 22:32:39 +08:00
|
|
|
PGO.assignRegionCounters(GlobalDecl(CD), F);
|
2015-09-10 16:12:02 +08:00
|
|
|
CapturedStmtInfo->EmitBody(*this, CD->getBody());
|
|
|
|
FinishFunction(CD->getBodyRBrace());
|
|
|
|
|
|
|
|
return F;
|
|
|
|
}
|
|
|
|
|
2014-05-06 18:08:46 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// OpenMP Directive Emission
|
|
|
|
//===----------------------------------------------------------------------===//
|
2015-04-14 13:11:24 +08:00
|
|
|
void CodeGenFunction::EmitOMPAggregateAssign(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address DestAddr, Address SrcAddr, QualType OriginalType,
|
|
|
|
const llvm::function_ref<void(Address, Address)> &CopyGen) {
|
2015-04-14 13:11:24 +08:00
|
|
|
// Perform element-by-element initialization.
|
|
|
|
QualType ElementTy;
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
|
|
|
|
// Drill down to the base element type on both arrays.
|
2015-04-14 13:11:24 +08:00
|
|
|
auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
|
|
|
|
SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
|
|
|
|
|
|
|
|
auto SrcBegin = SrcAddr.getPointer();
|
|
|
|
auto DestBegin = DestAddr.getPointer();
|
2015-04-14 13:11:24 +08:00
|
|
|
// Cast from pointer to array type to pointer to single element.
|
|
|
|
auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
|
|
|
|
// The basic structure here is a while-do loop.
|
|
|
|
auto BodyBB = createBasicBlock("omp.arraycpy.body");
|
|
|
|
auto DoneBB = createBasicBlock("omp.arraycpy.done");
|
|
|
|
auto IsEmpty =
|
|
|
|
Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
|
|
|
|
Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
|
|
|
|
|
|
|
|
// Enter the loop body, making that address the current address.
|
|
|
|
auto EntryBB = Builder.GetInsertBlock();
|
|
|
|
EmitBlock(BodyBB);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
|
|
|
|
CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
|
|
|
|
|
|
|
|
llvm::PHINode *SrcElementPHI =
|
|
|
|
Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
|
|
|
|
SrcElementPHI->addIncoming(SrcBegin, EntryBB);
|
|
|
|
Address SrcElementCurrent =
|
|
|
|
Address(SrcElementPHI,
|
|
|
|
SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
|
|
|
|
|
|
|
|
llvm::PHINode *DestElementPHI =
|
|
|
|
Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
|
|
|
|
DestElementPHI->addIncoming(DestBegin, EntryBB);
|
|
|
|
Address DestElementCurrent =
|
|
|
|
Address(DestElementPHI,
|
|
|
|
DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
|
2015-04-14 13:11:24 +08:00
|
|
|
|
|
|
|
// Emit copy.
|
|
|
|
CopyGen(DestElementCurrent, SrcElementCurrent);
|
|
|
|
|
|
|
|
// Shift the address forward by one element.
|
|
|
|
auto DestElementNext = Builder.CreateConstGEP1_32(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
|
2015-04-14 13:11:24 +08:00
|
|
|
auto SrcElementNext = Builder.CreateConstGEP1_32(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
|
2015-04-14 13:11:24 +08:00
|
|
|
// Check whether we've reached the end.
|
|
|
|
auto Done =
|
|
|
|
Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
|
|
|
|
Builder.CreateCondBr(Done, DoneBB, BodyBB);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
|
|
|
|
SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
|
2015-04-14 13:11:24 +08:00
|
|
|
|
|
|
|
// Done.
|
|
|
|
EmitBlock(DoneBB, /*IsFinished=*/true);
|
|
|
|
}
|
2014-10-08 22:01:46 +08:00
|
|
|
|
2015-10-08 17:10:53 +08:00
|
|
|
/// \brief Emit initialization of arrays of complex types.
|
|
|
|
/// \param DestAddr Address of the array.
|
|
|
|
/// \param Type Type of array.
|
|
|
|
/// \param Init Initial expression of array.
|
|
|
|
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
|
|
|
|
QualType Type, const Expr *Init) {
|
|
|
|
// Perform element-by-element initialization.
|
|
|
|
QualType ElementTy;
|
|
|
|
|
|
|
|
// Drill down to the base element type on both arrays.
|
|
|
|
auto ArrayTy = Type->getAsArrayTypeUnsafe();
|
|
|
|
auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
|
|
|
|
DestAddr =
|
|
|
|
CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
|
|
|
|
|
|
|
|
auto DestBegin = DestAddr.getPointer();
|
|
|
|
// Cast from pointer to array type to pointer to single element.
|
|
|
|
auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
|
|
|
|
// The basic structure here is a while-do loop.
|
|
|
|
auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
|
|
|
|
auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
|
|
|
|
auto IsEmpty =
|
|
|
|
CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
|
|
|
|
CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
|
|
|
|
|
|
|
|
// Enter the loop body, making that address the current address.
|
|
|
|
auto EntryBB = CGF.Builder.GetInsertBlock();
|
|
|
|
CGF.EmitBlock(BodyBB);
|
|
|
|
|
|
|
|
CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
|
|
|
|
|
|
|
|
llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
|
|
|
|
DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
|
|
|
|
DestElementPHI->addIncoming(DestBegin, EntryBB);
|
|
|
|
Address DestElementCurrent =
|
|
|
|
Address(DestElementPHI,
|
|
|
|
DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
|
|
|
|
|
|
|
|
// Emit copy.
|
|
|
|
{
|
|
|
|
CodeGenFunction::RunCleanupsScope InitScope(CGF);
|
|
|
|
CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
|
|
|
|
/*IsInitializer=*/false);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Shift the address forward by one element.
|
|
|
|
auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
|
|
|
|
DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
|
|
|
|
// Check whether we've reached the end.
|
|
|
|
auto Done =
|
|
|
|
CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
|
|
|
|
CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
|
|
|
|
DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
|
|
|
|
|
|
|
|
// Done.
|
|
|
|
CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
|
|
|
|
}
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
|
|
|
|
Address SrcAddr, const VarDecl *DestVD,
|
2015-04-14 13:11:24 +08:00
|
|
|
const VarDecl *SrcVD, const Expr *Copy) {
|
|
|
|
if (OriginalType->isArrayType()) {
|
|
|
|
auto *BO = dyn_cast<BinaryOperator>(Copy);
|
|
|
|
if (BO && BO->getOpcode() == BO_Assign) {
|
|
|
|
// Perform simple memcpy for simple copying.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
|
2015-04-14 13:11:24 +08:00
|
|
|
} else {
|
|
|
|
// For arrays with complex element types perform element by element
|
|
|
|
// copying.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
EmitOMPAggregateAssign(
|
2015-04-14 13:11:24 +08:00
|
|
|
DestAddr, SrcAddr, OriginalType,
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
[this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
|
2015-04-14 13:11:24 +08:00
|
|
|
// Working with the single array element, so have to remap
|
|
|
|
// destination and source variables to corresponding array
|
|
|
|
// elements.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CodeGenFunction::OMPPrivateScope Remap(*this);
|
|
|
|
Remap.addPrivate(DestVD, [DestElement]() -> Address {
|
2015-04-14 13:11:24 +08:00
|
|
|
return DestElement;
|
|
|
|
});
|
|
|
|
Remap.addPrivate(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
SrcVD, [SrcElement]() -> Address { return SrcElement; });
|
2015-04-14 13:11:24 +08:00
|
|
|
(void)Remap.Privatize();
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
EmitIgnoredExpr(Copy);
|
2015-04-14 13:11:24 +08:00
|
|
|
});
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Remap pseudo source variable to private copy.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CodeGenFunction::OMPPrivateScope Remap(*this);
|
|
|
|
Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
|
|
|
|
Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
|
2015-04-14 13:11:24 +08:00
|
|
|
(void)Remap.Privatize();
|
|
|
|
// Emit copying of the whole variable.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
EmitIgnoredExpr(Copy);
|
2014-10-08 22:01:46 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-15 12:52:20 +08:00
|
|
|
bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
|
|
|
|
OMPPrivateScope &PrivateScope) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!HaveInsertPoint())
|
|
|
|
return false;
|
2015-04-15 12:52:20 +08:00
|
|
|
llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
|
2014-10-08 22:01:46 +08:00
|
|
|
auto IRef = C->varlist_begin();
|
|
|
|
auto InitsRef = C->inits().begin();
|
|
|
|
for (auto IInit : C->private_copies()) {
|
2014-10-10 17:48:26 +08:00
|
|
|
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
|
2015-04-15 12:52:20 +08:00
|
|
|
if (EmittedAsFirstprivate.count(OrigVD) == 0) {
|
|
|
|
EmittedAsFirstprivate.insert(OrigVD);
|
|
|
|
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
|
|
|
|
auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
|
|
|
|
bool IsRegistered;
|
|
|
|
DeclRefExpr DRE(
|
|
|
|
const_cast<VarDecl *>(OrigVD),
|
|
|
|
/*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
|
|
|
|
OrigVD) != nullptr,
|
|
|
|
(*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address OriginalAddr = EmitLValue(&DRE).getAddress();
|
2015-05-19 20:31:28 +08:00
|
|
|
QualType Type = OrigVD->getType();
|
|
|
|
if (Type->isArrayType()) {
|
2015-04-15 12:52:20 +08:00
|
|
|
// Emit VarDecl with copy init for arrays.
|
|
|
|
// Get the address of the original variable captured in current
|
|
|
|
// captured region.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
|
2015-04-15 12:52:20 +08:00
|
|
|
auto Emission = EmitAutoVarAlloca(*VD);
|
|
|
|
auto *Init = VD->getInit();
|
|
|
|
if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
|
|
|
|
// Perform simple memcpy.
|
|
|
|
EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
|
2015-05-19 20:31:28 +08:00
|
|
|
Type);
|
2015-04-15 12:52:20 +08:00
|
|
|
} else {
|
|
|
|
EmitOMPAggregateAssign(
|
2015-05-19 20:31:28 +08:00
|
|
|
Emission.getAllocatedAddress(), OriginalAddr, Type,
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
[this, VDInit, Init](Address DestElement,
|
|
|
|
Address SrcElement) {
|
2015-04-15 12:52:20 +08:00
|
|
|
// Clean up any temporaries needed by the initialization.
|
|
|
|
RunCleanupsScope InitScope(*this);
|
|
|
|
// Emit initialization for single element.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
setAddrOfLocalVar(VDInit, SrcElement);
|
2015-04-15 12:52:20 +08:00
|
|
|
EmitAnyExprToMem(Init, DestElement,
|
|
|
|
Init->getType().getQualifiers(),
|
|
|
|
/*IsInitializer*/ false);
|
|
|
|
LocalDeclMap.erase(VDInit);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
EmitAutoVarCleanups(Emission);
|
|
|
|
return Emission.getAllocatedAddress();
|
|
|
|
});
|
|
|
|
} else {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
|
2015-04-15 12:52:20 +08:00
|
|
|
// Emit private VarDecl with copy init.
|
|
|
|
// Remap temp VDInit variable to the address of the original
|
|
|
|
// variable
|
|
|
|
// (for proper handling of captured global variables).
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
setAddrOfLocalVar(VDInit, OriginalAddr);
|
2015-04-15 12:52:20 +08:00
|
|
|
EmitDecl(*VD);
|
|
|
|
LocalDeclMap.erase(VDInit);
|
|
|
|
return GetAddrOfLocalVar(VD);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
assert(IsRegistered &&
|
|
|
|
"firstprivate var already registered as private");
|
|
|
|
// Silence the warning about unused variable.
|
|
|
|
(void)IsRegistered;
|
|
|
|
}
|
2014-10-08 22:01:46 +08:00
|
|
|
++IRef, ++InitsRef;
|
|
|
|
}
|
|
|
|
}
|
2015-04-15 12:52:20 +08:00
|
|
|
return !EmittedAsFirstprivate.empty();
|
2014-10-08 22:01:46 +08:00
|
|
|
}
|
|
|
|
|
2014-10-21 11:16:40 +08:00
|
|
|
void CodeGenFunction::EmitOMPPrivateClause(
|
|
|
|
const OMPExecutableDirective &D,
|
|
|
|
CodeGenFunction::OMPPrivateScope &PrivateScope) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!HaveInsertPoint())
|
|
|
|
return;
|
2015-04-22 20:24:45 +08:00
|
|
|
llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
|
2014-10-21 11:16:40 +08:00
|
|
|
auto IRef = C->varlist_begin();
|
|
|
|
for (auto IInit : C->private_copies()) {
|
|
|
|
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
|
2015-04-22 20:24:45 +08:00
|
|
|
if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
|
|
|
|
auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
|
|
|
|
bool IsRegistered =
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
PrivateScope.addPrivate(OrigVD, [&]() -> Address {
|
2015-04-22 20:24:45 +08:00
|
|
|
// Emit private VarDecl with copy init.
|
|
|
|
EmitDecl(*VD);
|
|
|
|
return GetAddrOfLocalVar(VD);
|
|
|
|
});
|
|
|
|
assert(IsRegistered && "private var already registered as private");
|
|
|
|
// Silence the warning about unused variable.
|
|
|
|
(void)IsRegistered;
|
|
|
|
}
|
2014-10-21 11:16:40 +08:00
|
|
|
++IRef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-16 13:39:01 +08:00
|
|
|
bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!HaveInsertPoint())
|
|
|
|
return false;
|
2015-04-16 13:39:01 +08:00
|
|
|
// threadprivate_var1 = master_threadprivate_var1;
|
|
|
|
// operator=(threadprivate_var2, master_threadprivate_var2);
|
|
|
|
// ...
|
|
|
|
// __kmpc_barrier(&loc, global_tid);
|
|
|
|
llvm::DenseSet<const VarDecl *> CopiedVars;
|
|
|
|
llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
|
2015-04-16 13:39:01 +08:00
|
|
|
auto IRef = C->varlist_begin();
|
|
|
|
auto ISrcRef = C->source_exprs().begin();
|
|
|
|
auto IDestRef = C->destination_exprs().begin();
|
|
|
|
for (auto *AssignOp : C->assignment_ops()) {
|
|
|
|
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
|
2015-05-19 20:31:28 +08:00
|
|
|
QualType Type = VD->getType();
|
2015-04-16 13:39:01 +08:00
|
|
|
if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
|
2015-07-28 00:38:06 +08:00
|
|
|
|
|
|
|
// Get the address of the master variable. If we are emitting code with
|
|
|
|
// TLS support, the address is passed from the master as field in the
|
|
|
|
// captured declaration.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address MasterAddr = Address::invalid();
|
2015-07-28 00:38:06 +08:00
|
|
|
if (getLangOpts().OpenMPUseTLS &&
|
|
|
|
getContext().getTargetInfo().isTLSSupported()) {
|
|
|
|
assert(CapturedStmtInfo->lookup(VD) &&
|
|
|
|
"Copyin threadprivates should have been captured!");
|
|
|
|
DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
|
|
|
|
VK_LValue, (*IRef)->getExprLoc());
|
|
|
|
MasterAddr = EmitLValue(&DRE).getAddress();
|
2015-09-10 16:12:02 +08:00
|
|
|
LocalDeclMap.erase(VD);
|
2015-07-28 00:38:06 +08:00
|
|
|
} else {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
MasterAddr =
|
|
|
|
Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
|
|
|
|
: CGM.GetAddrOfGlobal(VD),
|
|
|
|
getContext().getDeclAlign(VD));
|
2015-07-28 00:38:06 +08:00
|
|
|
}
|
2015-04-16 13:39:01 +08:00
|
|
|
// Get the address of the threadprivate variable.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address PrivateAddr = EmitLValue(*IRef).getAddress();
|
2015-04-16 13:39:01 +08:00
|
|
|
if (CopiedVars.size() == 1) {
|
|
|
|
// At first check if current thread is a master thread. If it is, no
|
|
|
|
// need to copy data.
|
|
|
|
CopyBegin = createBasicBlock("copyin.not.master");
|
|
|
|
CopyEnd = createBasicBlock("copyin.not.master.end");
|
|
|
|
Builder.CreateCondBr(
|
|
|
|
Builder.CreateICmpNE(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
|
|
|
|
Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
|
2015-04-16 13:39:01 +08:00
|
|
|
CopyBegin, CopyEnd);
|
|
|
|
EmitBlock(CopyBegin);
|
|
|
|
}
|
|
|
|
auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
|
|
|
|
auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
|
2015-04-16 13:39:01 +08:00
|
|
|
}
|
|
|
|
++IRef;
|
|
|
|
++ISrcRef;
|
|
|
|
++IDestRef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (CopyEnd) {
|
|
|
|
// Exit out of copying procedure for non-master thread.
|
|
|
|
EmitBlock(CopyEnd, /*IsFinished=*/true);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2015-04-16 12:54:05 +08:00
|
|
|
bool CodeGenFunction::EmitOMPLastprivateClauseInit(
|
|
|
|
const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!HaveInsertPoint())
|
|
|
|
return false;
|
2015-04-16 12:54:05 +08:00
|
|
|
bool HasAtLeastOneLastprivate = false;
|
|
|
|
llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
|
2015-05-13 18:23:02 +08:00
|
|
|
HasAtLeastOneLastprivate = true;
|
2015-04-16 12:54:05 +08:00
|
|
|
auto IRef = C->varlist_begin();
|
|
|
|
auto IDestRef = C->destination_exprs().begin();
|
|
|
|
for (auto *IInit : C->private_copies()) {
|
|
|
|
// Keep the address of the original variable for future update at the end
|
|
|
|
// of the loop.
|
|
|
|
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
|
|
|
|
if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
|
|
|
|
auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
|
2015-04-16 12:54:05 +08:00
|
|
|
DeclRefExpr DRE(
|
|
|
|
const_cast<VarDecl *>(OrigVD),
|
|
|
|
/*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
|
|
|
|
OrigVD) != nullptr,
|
|
|
|
(*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
|
|
|
|
return EmitLValue(&DRE).getAddress();
|
|
|
|
});
|
|
|
|
// Check if the variable is also a firstprivate: in this case IInit is
|
|
|
|
// not generated. Initialization of this variable will happen in codegen
|
|
|
|
// for 'firstprivate' clause.
|
2015-05-13 18:23:02 +08:00
|
|
|
if (IInit) {
|
|
|
|
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
|
|
|
|
bool IsRegistered =
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
PrivateScope.addPrivate(OrigVD, [&]() -> Address {
|
2015-05-13 18:23:02 +08:00
|
|
|
// Emit private VarDecl with copy init.
|
|
|
|
EmitDecl(*VD);
|
|
|
|
return GetAddrOfLocalVar(VD);
|
|
|
|
});
|
|
|
|
assert(IsRegistered &&
|
|
|
|
"lastprivate var already registered as private");
|
|
|
|
(void)IsRegistered;
|
|
|
|
}
|
2015-04-16 12:54:05 +08:00
|
|
|
}
|
|
|
|
++IRef, ++IDestRef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return HasAtLeastOneLastprivate;
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPLastprivateClauseFinal(
|
|
|
|
const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!HaveInsertPoint())
|
|
|
|
return;
|
2015-04-16 12:54:05 +08:00
|
|
|
// Emit following code:
|
|
|
|
// if (<IsLastIterCond>) {
|
|
|
|
// orig_var1 = private_orig_var1;
|
|
|
|
// ...
|
|
|
|
// orig_varn = private_orig_varn;
|
|
|
|
// }
|
2015-06-16 21:14:42 +08:00
|
|
|
llvm::BasicBlock *ThenBB = nullptr;
|
|
|
|
llvm::BasicBlock *DoneBB = nullptr;
|
|
|
|
if (IsLastIterCond) {
|
|
|
|
ThenBB = createBasicBlock(".omp.lastprivate.then");
|
|
|
|
DoneBB = createBasicBlock(".omp.lastprivate.done");
|
|
|
|
Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
|
|
|
|
EmitBlock(ThenBB);
|
|
|
|
}
|
2015-05-21 15:59:51 +08:00
|
|
|
llvm::DenseMap<const Decl *, const Expr *> LoopCountersAndUpdates;
|
|
|
|
const Expr *LastIterVal = nullptr;
|
|
|
|
const Expr *IVExpr = nullptr;
|
|
|
|
const Expr *IncExpr = nullptr;
|
|
|
|
if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
|
2015-06-16 21:14:42 +08:00
|
|
|
if (isOpenMPWorksharingDirective(D.getDirectiveKind())) {
|
|
|
|
LastIterVal = cast<VarDecl>(cast<DeclRefExpr>(
|
|
|
|
LoopDirective->getUpperBoundVariable())
|
|
|
|
->getDecl())
|
|
|
|
->getAnyInitializer();
|
|
|
|
IVExpr = LoopDirective->getIterationVariable();
|
|
|
|
IncExpr = LoopDirective->getInc();
|
|
|
|
auto IUpdate = LoopDirective->updates().begin();
|
|
|
|
for (auto *E : LoopDirective->counters()) {
|
|
|
|
auto *D = cast<DeclRefExpr>(E)->getDecl()->getCanonicalDecl();
|
|
|
|
LoopCountersAndUpdates[D] = *IUpdate;
|
|
|
|
++IUpdate;
|
|
|
|
}
|
2015-05-21 15:59:51 +08:00
|
|
|
}
|
|
|
|
}
|
2015-04-16 12:54:05 +08:00
|
|
|
{
|
|
|
|
llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
|
2015-05-21 15:59:51 +08:00
|
|
|
bool FirstLCV = true;
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
|
2015-04-16 12:54:05 +08:00
|
|
|
auto IRef = C->varlist_begin();
|
|
|
|
auto ISrcRef = C->source_exprs().begin();
|
|
|
|
auto IDestRef = C->destination_exprs().begin();
|
|
|
|
for (auto *AssignOp : C->assignment_ops()) {
|
|
|
|
auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
|
2015-05-19 20:31:28 +08:00
|
|
|
QualType Type = PrivateVD->getType();
|
2015-05-21 15:59:51 +08:00
|
|
|
auto *CanonicalVD = PrivateVD->getCanonicalDecl();
|
|
|
|
if (AlreadyEmittedVars.insert(CanonicalVD).second) {
|
|
|
|
// If lastprivate variable is a loop control variable for loop-based
|
|
|
|
// directive, update its value before copyin back to original
|
|
|
|
// variable.
|
|
|
|
if (auto *UpExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) {
|
2015-06-16 21:14:42 +08:00
|
|
|
if (FirstLCV && LastIterVal) {
|
2015-05-21 15:59:51 +08:00
|
|
|
EmitAnyExprToMem(LastIterVal, EmitLValue(IVExpr).getAddress(),
|
|
|
|
IVExpr->getType().getQualifiers(),
|
|
|
|
/*IsInitializer=*/false);
|
|
|
|
EmitIgnoredExpr(IncExpr);
|
|
|
|
FirstLCV = false;
|
|
|
|
}
|
|
|
|
EmitIgnoredExpr(UpExpr);
|
|
|
|
}
|
2015-04-16 12:54:05 +08:00
|
|
|
auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
|
|
|
|
auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
|
|
|
|
// Get the address of the original variable.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address OriginalAddr = GetAddrOfLocalVar(DestVD);
|
2015-04-16 12:54:05 +08:00
|
|
|
// Get the address of the private variable.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
|
|
|
|
if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
|
2015-09-04 19:26:21 +08:00
|
|
|
PrivateAddr =
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address(Builder.CreateLoad(PrivateAddr),
|
|
|
|
getNaturalTypeAlignment(RefTy->getPointeeType()));
|
|
|
|
EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
|
2015-04-16 12:54:05 +08:00
|
|
|
}
|
|
|
|
++IRef;
|
|
|
|
++ISrcRef;
|
|
|
|
++IDestRef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-06-16 21:14:42 +08:00
|
|
|
if (IsLastIterCond) {
|
|
|
|
EmitBlock(DoneBB, /*IsFinished=*/true);
|
|
|
|
}
|
2015-04-16 12:54:05 +08:00
|
|
|
}
|
|
|
|
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
void CodeGenFunction::EmitOMPReductionClauseInit(
|
|
|
|
const OMPExecutableDirective &D,
|
|
|
|
CodeGenFunction::OMPPrivateScope &PrivateScope) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!HaveInsertPoint())
|
|
|
|
return;
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
auto ILHS = C->lhs_exprs().begin();
|
|
|
|
auto IRHS = C->rhs_exprs().begin();
|
2015-10-08 17:10:53 +08:00
|
|
|
auto IPriv = C->privates().begin();
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
for (auto IRef : C->varlists()) {
|
|
|
|
auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
|
2015-10-08 17:10:53 +08:00
|
|
|
auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
|
|
|
|
auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
|
|
|
|
if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) {
|
|
|
|
auto *Base = OASE->getBase()->IgnoreParenImpCasts();
|
|
|
|
while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
|
|
|
|
Base = TempOASE->getBase()->IgnoreParenImpCasts();
|
|
|
|
while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
|
|
|
|
Base = TempASE->getBase()->IgnoreParenImpCasts();
|
|
|
|
auto *DE = cast<DeclRefExpr>(Base);
|
|
|
|
auto *OrigVD = cast<VarDecl>(DE->getDecl());
|
|
|
|
auto OASELValueLB = EmitOMPArraySectionExpr(OASE);
|
|
|
|
auto OASELValueUB =
|
|
|
|
EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
|
|
|
|
auto OriginalBaseLValue = EmitLValue(DE);
|
|
|
|
auto BaseLValue = OriginalBaseLValue;
|
|
|
|
auto *Zero = Builder.getInt64(/*C=*/0);
|
|
|
|
llvm::SmallVector<llvm::Value *, 4> Indexes;
|
|
|
|
Indexes.push_back(Zero);
|
|
|
|
auto *ItemTy =
|
|
|
|
OASELValueLB.getPointer()->getType()->getPointerElementType();
|
|
|
|
auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType();
|
|
|
|
while (Ty != ItemTy) {
|
|
|
|
Indexes.push_back(Zero);
|
|
|
|
Ty = Ty->getPointerElementType();
|
|
|
|
}
|
|
|
|
BaseLValue = MakeAddrLValue(
|
|
|
|
Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes),
|
|
|
|
OASELValueLB.getAlignment()),
|
|
|
|
OASELValueLB.getType(), OASELValueLB.getAlignmentSource());
|
|
|
|
// Store the address of the original variable associated with the LHS
|
|
|
|
// implicit variable.
|
|
|
|
PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address {
|
|
|
|
return OASELValueLB.getAddress();
|
|
|
|
});
|
|
|
|
// Emit reduction copy.
|
|
|
|
bool IsRegistered = PrivateScope.addPrivate(
|
|
|
|
OrigVD, [this, PrivateVD, BaseLValue, OASELValueLB, OASELValueUB,
|
|
|
|
OriginalBaseLValue]() -> Address {
|
|
|
|
// Emit VarDecl with copy init for arrays.
|
|
|
|
// Get the address of the original variable captured in current
|
|
|
|
// captured region.
|
|
|
|
auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(),
|
|
|
|
OASELValueLB.getPointer());
|
|
|
|
Size = Builder.CreateNUWAdd(
|
|
|
|
Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
|
|
|
|
CodeGenFunction::OpaqueValueMapping OpaqueMap(
|
|
|
|
*this, cast<OpaqueValueExpr>(
|
|
|
|
getContext()
|
|
|
|
.getAsVariableArrayType(PrivateVD->getType())
|
|
|
|
->getSizeExpr()),
|
|
|
|
RValue::get(Size));
|
|
|
|
EmitVariablyModifiedType(PrivateVD->getType());
|
|
|
|
auto Emission = EmitAutoVarAlloca(*PrivateVD);
|
|
|
|
auto Addr = Emission.getAllocatedAddress();
|
|
|
|
auto *Init = PrivateVD->getInit();
|
|
|
|
EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init);
|
|
|
|
EmitAutoVarCleanups(Emission);
|
|
|
|
// Emit private VarDecl with reduction init.
|
|
|
|
auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
|
|
|
|
OASELValueLB.getPointer());
|
|
|
|
auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
|
|
|
|
Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
Ptr, OriginalBaseLValue.getPointer()->getType());
|
|
|
|
return Address(Ptr, OriginalBaseLValue.getAlignment());
|
|
|
|
});
|
|
|
|
assert(IsRegistered && "private var already registered as private");
|
|
|
|
// Silence the warning about unused variable.
|
|
|
|
(void)IsRegistered;
|
|
|
|
PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
|
|
|
|
return GetAddrOfLocalVar(PrivateVD);
|
|
|
|
});
|
|
|
|
} else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) {
|
|
|
|
auto *Base = ASE->getBase()->IgnoreParenImpCasts();
|
|
|
|
while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
|
|
|
|
Base = TempASE->getBase()->IgnoreParenImpCasts();
|
|
|
|
auto *DE = cast<DeclRefExpr>(Base);
|
|
|
|
auto *OrigVD = cast<VarDecl>(DE->getDecl());
|
|
|
|
auto ASELValue = EmitLValue(ASE);
|
|
|
|
auto OriginalBaseLValue = EmitLValue(DE);
|
|
|
|
auto BaseLValue = OriginalBaseLValue;
|
|
|
|
auto *Zero = Builder.getInt64(/*C=*/0);
|
|
|
|
llvm::SmallVector<llvm::Value *, 4> Indexes;
|
|
|
|
Indexes.push_back(Zero);
|
|
|
|
auto *ItemTy =
|
|
|
|
ASELValue.getPointer()->getType()->getPointerElementType();
|
|
|
|
auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType();
|
|
|
|
while (Ty != ItemTy) {
|
|
|
|
Indexes.push_back(Zero);
|
|
|
|
Ty = Ty->getPointerElementType();
|
|
|
|
}
|
|
|
|
BaseLValue = MakeAddrLValue(
|
|
|
|
Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes),
|
|
|
|
ASELValue.getAlignment()),
|
|
|
|
ASELValue.getType(), ASELValue.getAlignmentSource());
|
|
|
|
// Store the address of the original variable associated with the LHS
|
|
|
|
// implicit variable.
|
|
|
|
PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address {
|
|
|
|
return ASELValue.getAddress();
|
|
|
|
});
|
|
|
|
// Emit reduction copy.
|
|
|
|
bool IsRegistered = PrivateScope.addPrivate(
|
|
|
|
OrigVD, [this, PrivateVD, BaseLValue, ASELValue,
|
|
|
|
OriginalBaseLValue]() -> Address {
|
|
|
|
// Emit private VarDecl with reduction init.
|
|
|
|
EmitDecl(*PrivateVD);
|
|
|
|
auto Addr = GetAddrOfLocalVar(PrivateVD);
|
|
|
|
auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
|
|
|
|
ASELValue.getPointer());
|
|
|
|
auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
|
|
|
|
Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
|
Ptr, OriginalBaseLValue.getPointer()->getType());
|
|
|
|
return Address(Ptr, OriginalBaseLValue.getAlignment());
|
|
|
|
});
|
|
|
|
assert(IsRegistered && "private var already registered as private");
|
|
|
|
// Silence the warning about unused variable.
|
|
|
|
(void)IsRegistered;
|
2016-01-26 20:20:39 +08:00
|
|
|
PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
|
|
|
|
return Builder.CreateElementBitCast(
|
|
|
|
GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()),
|
|
|
|
"rhs.begin");
|
2015-10-08 17:10:53 +08:00
|
|
|
});
|
|
|
|
} else {
|
|
|
|
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
|
2016-01-26 20:20:39 +08:00
|
|
|
QualType Type = PrivateVD->getType();
|
|
|
|
if (getContext().getAsArrayType(Type)) {
|
|
|
|
// Store the address of the original variable associated with the LHS
|
|
|
|
// implicit variable.
|
2015-10-08 17:10:53 +08:00
|
|
|
DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
|
|
|
|
CapturedStmtInfo->lookup(OrigVD) != nullptr,
|
|
|
|
IRef->getType(), VK_LValue, IRef->getExprLoc());
|
2016-01-26 20:20:39 +08:00
|
|
|
Address OriginalAddr = EmitLValue(&DRE).getAddress();
|
|
|
|
PrivateScope.addPrivate(LHSVD, [this, OriginalAddr,
|
|
|
|
LHSVD]() -> Address {
|
|
|
|
return Builder.CreateElementBitCast(
|
|
|
|
OriginalAddr, ConvertTypeForMem(LHSVD->getType()),
|
|
|
|
"lhs.begin");
|
|
|
|
});
|
|
|
|
bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
|
|
|
|
if (Type->isVariablyModifiedType()) {
|
|
|
|
CodeGenFunction::OpaqueValueMapping OpaqueMap(
|
|
|
|
*this, cast<OpaqueValueExpr>(
|
|
|
|
getContext()
|
|
|
|
.getAsVariableArrayType(PrivateVD->getType())
|
|
|
|
->getSizeExpr()),
|
|
|
|
RValue::get(
|
|
|
|
getTypeSize(OrigVD->getType().getNonReferenceType())));
|
|
|
|
EmitVariablyModifiedType(Type);
|
|
|
|
}
|
|
|
|
auto Emission = EmitAutoVarAlloca(*PrivateVD);
|
|
|
|
auto Addr = Emission.getAllocatedAddress();
|
|
|
|
auto *Init = PrivateVD->getInit();
|
|
|
|
EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init);
|
|
|
|
EmitAutoVarCleanups(Emission);
|
|
|
|
return Emission.getAllocatedAddress();
|
|
|
|
});
|
|
|
|
assert(IsRegistered && "private var already registered as private");
|
|
|
|
// Silence the warning about unused variable.
|
|
|
|
(void)IsRegistered;
|
|
|
|
PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
|
|
|
|
return Builder.CreateElementBitCast(
|
|
|
|
GetAddrOfLocalVar(PrivateVD),
|
|
|
|
ConvertTypeForMem(RHSVD->getType()), "rhs.begin");
|
|
|
|
});
|
|
|
|
} else {
|
|
|
|
// Store the address of the original variable associated with the LHS
|
|
|
|
// implicit variable.
|
|
|
|
PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> Address {
|
|
|
|
DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
|
|
|
|
CapturedStmtInfo->lookup(OrigVD) != nullptr,
|
|
|
|
IRef->getType(), VK_LValue, IRef->getExprLoc());
|
|
|
|
return EmitLValue(&DRE).getAddress();
|
|
|
|
});
|
|
|
|
// Emit reduction copy.
|
|
|
|
bool IsRegistered =
|
|
|
|
PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> Address {
|
|
|
|
// Emit private VarDecl with reduction init.
|
|
|
|
EmitDecl(*PrivateVD);
|
|
|
|
return GetAddrOfLocalVar(PrivateVD);
|
|
|
|
});
|
|
|
|
assert(IsRegistered && "private var already registered as private");
|
|
|
|
// Silence the warning about unused variable.
|
|
|
|
(void)IsRegistered;
|
|
|
|
PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
|
|
|
|
return GetAddrOfLocalVar(PrivateVD);
|
|
|
|
});
|
|
|
|
}
|
2015-10-08 17:10:53 +08:00
|
|
|
}
|
|
|
|
++ILHS, ++IRHS, ++IPriv;
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPReductionClauseFinal(
|
|
|
|
const OMPExecutableDirective &D) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!HaveInsertPoint())
|
|
|
|
return;
|
2015-10-08 17:10:53 +08:00
|
|
|
llvm::SmallVector<const Expr *, 8> Privates;
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
llvm::SmallVector<const Expr *, 8> LHSExprs;
|
|
|
|
llvm::SmallVector<const Expr *, 8> RHSExprs;
|
|
|
|
llvm::SmallVector<const Expr *, 8> ReductionOps;
|
|
|
|
bool HasAtLeastOneReduction = false;
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
HasAtLeastOneReduction = true;
|
2015-10-08 17:10:53 +08:00
|
|
|
Privates.append(C->privates().begin(), C->privates().end());
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
|
|
|
|
RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
|
|
|
|
ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
|
|
|
|
}
|
|
|
|
if (HasAtLeastOneReduction) {
|
|
|
|
// Emit nowait reduction if nowait clause is present or directive is a
|
|
|
|
// parallel directive (it always has implicit barrier).
|
|
|
|
CGM.getOpenMPRuntime().emitReduction(
|
2015-10-08 17:10:53 +08:00
|
|
|
*this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
|
2015-08-30 23:12:28 +08:00
|
|
|
D.getSingleClause<OMPNowaitClause>() ||
|
2015-06-17 14:21:39 +08:00
|
|
|
isOpenMPParallelDirective(D.getDirectiveKind()) ||
|
|
|
|
D.getDirectiveKind() == OMPD_simd,
|
|
|
|
D.getDirectiveKind() == OMPD_simd);
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
|
|
|
|
const OMPExecutableDirective &S,
|
2015-07-03 17:56:58 +08:00
|
|
|
OpenMPDirectiveKind InnermostKind,
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
const RegionCodeGenTy &CodeGen) {
|
|
|
|
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
|
2015-09-10 16:12:02 +08:00
|
|
|
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
|
|
|
|
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
|
2015-07-03 17:56:58 +08:00
|
|
|
S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
|
2015-08-30 23:12:28 +08:00
|
|
|
if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
|
2014-10-13 16:23:51 +08:00
|
|
|
CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
|
|
|
|
auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
|
|
|
|
/*IgnoreResultAssign*/ true);
|
2015-02-25 16:32:46 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
|
2014-10-13 16:23:51 +08:00
|
|
|
CGF, NumThreads, NumThreadsClause->getLocStart());
|
|
|
|
}
|
2015-08-30 23:12:28 +08:00
|
|
|
if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
|
2015-06-18 21:40:03 +08:00
|
|
|
CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitProcBindClause(
|
|
|
|
CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
|
|
|
|
}
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
const Expr *IfCond = nullptr;
|
2015-09-03 16:45:56 +08:00
|
|
|
for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
|
|
|
|
if (C->getNameModifier() == OMPD_unknown ||
|
|
|
|
C->getNameModifier() == OMPD_parallel) {
|
|
|
|
IfCond = C->getCondition();
|
|
|
|
break;
|
|
|
|
}
|
[OPENMP] Codegen for 'if' clause in 'task' directive.
If condition evaluates to true, the code executes task by calling @__kmpc_omp_task() runtime function.
If condition evaluates to false, the code executes serial version of the code by executing the following code:
call void @__kmpc_omp_task_begin_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
proxy_task_entry(<gtid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
call void @__kmpc_omp_task_complete_if0(<loc>, <threadid>, <task_t_ptr, returned by @__kmpc_omp_task_alloc()>);
Also it checks if the condition is constant and if it is constant it evaluates its value and then generates either parallel version of the code (if the condition evaluates to true), or the serial version of the code (if the condition evaluates to false).
Differential Revision: http://reviews.llvm.org/D9143
llvm-svn: 235507
2015-04-22 21:57:31 +08:00
|
|
|
}
|
2015-02-25 16:32:46 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
|
2015-09-10 16:12:02 +08:00
|
|
|
CapturedVars, IfCond);
|
2015-04-10 12:50:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
|
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
|
|
|
// Emit parallel region as a standalone region.
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
|
|
|
OMPPrivateScope PrivateScope(CGF);
|
2015-04-16 13:39:01 +08:00
|
|
|
bool Copyins = CGF.EmitOMPCopyinClause(S);
|
|
|
|
bool Firstprivates = CGF.EmitOMPFirstprivateClause(S, PrivateScope);
|
|
|
|
if (Copyins || Firstprivates) {
|
2015-04-15 12:52:20 +08:00
|
|
|
// Emit implicit barrier to synchronize threads and avoid data races on
|
2015-04-16 13:39:01 +08:00
|
|
|
// initialization of firstprivate variables or propagation master's thread
|
|
|
|
// values of threadprivate variables to local instances of that variables
|
|
|
|
// of all other implicit threads.
|
2015-09-15 20:52:43 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitBarrierCall(
|
|
|
|
CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
|
|
|
|
/*ForceSimpleCall=*/true);
|
2015-04-15 12:52:20 +08:00
|
|
|
}
|
|
|
|
CGF.EmitOMPPrivateClause(S, PrivateScope);
|
|
|
|
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
|
|
|
|
(void)PrivateScope.Privatize();
|
2015-04-10 12:50:10 +08:00
|
|
|
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
CGF.EmitOMPReductionClauseFinal(S);
|
2015-04-10 12:50:10 +08:00
|
|
|
};
|
2015-07-03 17:56:58 +08:00
|
|
|
emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen);
|
2014-05-06 18:08:46 +08:00
|
|
|
}
|
2014-05-22 16:54:05 +08:00
|
|
|
|
2015-07-02 12:17:07 +08:00
|
|
|
void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
|
|
|
|
JumpDest LoopExit) {
|
2014-10-01 14:03:56 +08:00
|
|
|
RunCleanupsScope BodyScope(*this);
|
|
|
|
// Update counters values on current iteration.
|
2015-06-18 12:45:29 +08:00
|
|
|
for (auto I : D.updates()) {
|
2014-10-01 14:03:56 +08:00
|
|
|
EmitIgnoredExpr(I);
|
|
|
|
}
|
2015-03-21 18:12:56 +08:00
|
|
|
// Update the linear variables.
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
|
2015-03-21 18:12:56 +08:00
|
|
|
for (auto U : C->updates()) {
|
|
|
|
EmitIgnoredExpr(U);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-10-01 14:03:56 +08:00
|
|
|
// On a continue in the body, jump to the end.
|
2014-10-07 16:57:09 +08:00
|
|
|
auto Continue = getJumpDestInCurrentScope("omp.body.continue");
|
2015-07-02 12:17:07 +08:00
|
|
|
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
|
2014-10-01 14:03:56 +08:00
|
|
|
// Emit loop body.
|
2015-06-18 12:45:29 +08:00
|
|
|
EmitStmt(D.getBody());
|
2014-10-01 14:03:56 +08:00
|
|
|
// The end (updates/cleanups).
|
|
|
|
EmitBlock(Continue.getBlock());
|
|
|
|
BreakContinueStack.pop_back();
|
|
|
|
// TODO: Update lastprivates if the SeparateIter flag is true.
|
|
|
|
// This will be implemented in a follow-up OMPLastprivateClause patch, but
|
|
|
|
// result should be still correct without it, as we do not make these
|
|
|
|
// variables private yet.
|
|
|
|
}
|
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
void CodeGenFunction::EmitOMPInnerLoop(
|
|
|
|
const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
|
|
|
|
const Expr *IncExpr,
|
2015-04-22 19:15:40 +08:00
|
|
|
const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
|
|
|
|
const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
|
2014-10-07 16:57:09 +08:00
|
|
|
auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
|
2014-10-01 14:03:56 +08:00
|
|
|
|
|
|
|
// Start the loop with a block that tests the condition.
|
2014-10-07 16:57:09 +08:00
|
|
|
auto CondBlock = createBasicBlock("omp.inner.for.cond");
|
2014-10-01 14:03:56 +08:00
|
|
|
EmitBlock(CondBlock);
|
|
|
|
LoopStack.push(CondBlock);
|
|
|
|
|
|
|
|
// If there are any cleanups between here and the loop-exit scope,
|
|
|
|
// create a block to stage a loop exit along.
|
|
|
|
auto ExitBlock = LoopExit.getBlock();
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
if (RequiresCleanup)
|
2014-10-07 16:57:09 +08:00
|
|
|
ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
|
2014-10-01 14:03:56 +08:00
|
|
|
|
2014-10-07 16:57:09 +08:00
|
|
|
auto LoopBody = createBasicBlock("omp.inner.for.body");
|
2014-10-01 14:03:56 +08:00
|
|
|
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
// Emit condition.
|
2015-04-24 07:06:47 +08:00
|
|
|
EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
|
2014-10-01 14:03:56 +08:00
|
|
|
if (ExitBlock != LoopExit.getBlock()) {
|
|
|
|
EmitBlock(ExitBlock);
|
|
|
|
EmitBranchThroughCleanup(LoopExit);
|
|
|
|
}
|
|
|
|
|
|
|
|
EmitBlock(LoopBody);
|
2015-04-24 07:06:47 +08:00
|
|
|
incrementProfileCounter(&S);
|
2014-10-01 14:03:56 +08:00
|
|
|
|
|
|
|
// Create a block for the increment.
|
2014-10-07 16:57:09 +08:00
|
|
|
auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
|
2014-10-01 14:03:56 +08:00
|
|
|
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
|
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
BodyGen(*this);
|
2014-10-01 14:03:56 +08:00
|
|
|
|
|
|
|
// Emit "IV = IV + 1" and a back-edge to the condition block.
|
|
|
|
EmitBlock(Continue.getBlock());
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
EmitIgnoredExpr(IncExpr);
|
2015-04-22 19:15:40 +08:00
|
|
|
PostIncGen(*this);
|
2014-10-01 14:03:56 +08:00
|
|
|
BreakContinueStack.pop_back();
|
|
|
|
EmitBranch(CondBlock);
|
|
|
|
LoopStack.pop();
|
|
|
|
// Emit the fall-through block.
|
|
|
|
EmitBlock(LoopExit.getBlock());
|
|
|
|
}
|
|
|
|
|
2015-06-18 18:10:12 +08:00
|
|
|
void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!HaveInsertPoint())
|
|
|
|
return;
|
2015-06-17 15:45:51 +08:00
|
|
|
// Emit inits for the linear variables.
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
|
2015-06-17 15:45:51 +08:00
|
|
|
for (auto Init : C->inits()) {
|
|
|
|
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
|
2015-06-18 18:10:12 +08:00
|
|
|
auto *OrigVD = cast<VarDecl>(
|
|
|
|
cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())->getDecl());
|
|
|
|
DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
|
|
|
|
CapturedStmtInfo->lookup(OrigVD) != nullptr,
|
|
|
|
VD->getInit()->getType(), VK_LValue,
|
|
|
|
VD->getInit()->getExprLoc());
|
|
|
|
AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
|
|
|
|
EmitExprAsInit(&DRE, VD,
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()),
|
2015-06-18 18:10:12 +08:00
|
|
|
/*capturedByInit=*/false);
|
|
|
|
EmitAutoVarCleanups(Emission);
|
2014-10-01 14:03:56 +08:00
|
|
|
}
|
2015-06-17 15:45:51 +08:00
|
|
|
// Emit the linear steps for the linear clauses.
|
|
|
|
// If a step is not constant, it is pre-calculated before the loop.
|
|
|
|
if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
|
|
|
|
if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
|
2015-06-18 18:10:12 +08:00
|
|
|
EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
|
2015-06-17 15:45:51 +08:00
|
|
|
// Emit calculation of the linear step.
|
2015-06-18 18:10:12 +08:00
|
|
|
EmitIgnoredExpr(CS);
|
2015-06-17 15:45:51 +08:00
|
|
|
}
|
2014-10-01 14:03:56 +08:00
|
|
|
}
|
2015-06-17 15:45:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void emitLinearClauseFinal(CodeGenFunction &CGF,
|
|
|
|
const OMPLoopDirective &D) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-03-21 18:12:56 +08:00
|
|
|
// Emit the final values of the linear variables.
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
|
2015-05-08 18:41:21 +08:00
|
|
|
auto IC = C->varlist_begin();
|
2015-03-21 18:12:56 +08:00
|
|
|
for (auto F : C->finals()) {
|
2015-05-08 18:41:21 +08:00
|
|
|
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
|
|
|
|
DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
|
2015-06-17 15:45:51 +08:00
|
|
|
CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
|
2015-05-08 18:41:21 +08:00
|
|
|
(*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address OrigAddr = CGF.EmitLValue(&DRE).getAddress();
|
2015-06-17 15:45:51 +08:00
|
|
|
CodeGenFunction::OMPPrivateScope VarScope(CGF);
|
2015-05-08 18:41:21 +08:00
|
|
|
VarScope.addPrivate(OrigVD,
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
[OrigAddr]() -> Address { return OrigAddr; });
|
2015-05-08 18:41:21 +08:00
|
|
|
(void)VarScope.Privatize();
|
2015-06-17 15:45:51 +08:00
|
|
|
CGF.EmitIgnoredExpr(F);
|
2015-05-08 18:41:21 +08:00
|
|
|
++IC;
|
2015-03-21 18:12:56 +08:00
|
|
|
}
|
|
|
|
}
|
2014-10-01 14:03:56 +08:00
|
|
|
}
|
|
|
|
|
2015-06-17 15:45:51 +08:00
|
|
|
static void emitAlignedClause(CodeGenFunction &CGF,
|
|
|
|
const OMPExecutableDirective &D) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
|
2015-06-17 15:45:51 +08:00
|
|
|
unsigned ClauseAlignment = 0;
|
|
|
|
if (auto AlignmentExpr = Clause->getAlignment()) {
|
|
|
|
auto AlignmentCI =
|
|
|
|
cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
|
|
|
|
ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
|
2014-09-30 13:29:28 +08:00
|
|
|
}
|
2015-06-17 15:45:51 +08:00
|
|
|
for (auto E : Clause->varlists()) {
|
|
|
|
unsigned Alignment = ClauseAlignment;
|
|
|
|
if (Alignment == 0) {
|
|
|
|
// OpenMP [2.8.1, Description]
|
|
|
|
// If no optional parameter is specified, implementation-defined default
|
|
|
|
// alignments for SIMD instructions on the target platforms are assumed.
|
|
|
|
Alignment =
|
2015-07-02 11:40:19 +08:00
|
|
|
CGF.getContext()
|
|
|
|
.toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
|
|
|
|
E->getType()->getPointeeType()))
|
|
|
|
.getQuantity();
|
2015-06-17 15:45:51 +08:00
|
|
|
}
|
|
|
|
assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
|
|
|
|
"alignment is not power of 2");
|
|
|
|
if (Alignment != 0) {
|
|
|
|
llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
|
|
|
|
CGF.EmitAlignmentAssumption(PtrValue, Alignment);
|
|
|
|
}
|
2014-09-30 13:29:28 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-06-17 15:45:51 +08:00
|
|
|
static void emitPrivateLoopCounters(CodeGenFunction &CGF,
|
2014-10-10 17:48:26 +08:00
|
|
|
CodeGenFunction::OMPPrivateScope &LoopScope,
|
2015-08-06 20:30:57 +08:00
|
|
|
ArrayRef<Expr *> Counters,
|
|
|
|
ArrayRef<Expr *> PrivateCounters) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-08-06 20:30:57 +08:00
|
|
|
auto I = PrivateCounters.begin();
|
2014-10-10 17:48:26 +08:00
|
|
|
for (auto *E : Counters) {
|
2015-08-06 20:30:57 +08:00
|
|
|
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
|
|
|
auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address Addr = Address::invalid();
|
|
|
|
(void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
|
2014-10-10 17:48:26 +08:00
|
|
|
// Emit var without initialization.
|
2015-08-06 20:30:57 +08:00
|
|
|
auto VarEmission = CGF.EmitAutoVarAlloca(*PrivateVD);
|
2014-10-10 17:48:26 +08:00
|
|
|
CGF.EmitAutoVarCleanups(VarEmission);
|
2015-08-06 20:30:57 +08:00
|
|
|
Addr = VarEmission.getAllocatedAddress();
|
|
|
|
return Addr;
|
2014-10-10 17:48:26 +08:00
|
|
|
});
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
(void)LoopScope.addPrivate(VD, [&]() -> Address { return Addr; });
|
2015-08-06 20:30:57 +08:00
|
|
|
++I;
|
2014-10-10 17:48:26 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-22 19:59:37 +08:00
|
|
|
static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
|
|
|
|
const Expr *Cond, llvm::BasicBlock *TrueBlock,
|
|
|
|
llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-06-11 18:53:56 +08:00
|
|
|
{
|
|
|
|
CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
|
2015-08-06 20:30:57 +08:00
|
|
|
emitPrivateLoopCounters(CGF, PreCondScope, S.counters(),
|
|
|
|
S.private_counters());
|
2015-06-11 18:53:56 +08:00
|
|
|
(void)PreCondScope.Privatize();
|
|
|
|
// Get initial values of real counters.
|
2015-08-14 20:25:37 +08:00
|
|
|
for (auto I : S.inits()) {
|
2015-06-11 18:53:56 +08:00
|
|
|
CGF.EmitIgnoredExpr(I);
|
|
|
|
}
|
2015-04-22 19:59:37 +08:00
|
|
|
}
|
|
|
|
// Check that loop is executed at least one time.
|
|
|
|
CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
|
|
|
|
}
|
|
|
|
|
2015-03-21 18:12:56 +08:00
|
|
|
static void
|
2015-06-17 15:45:51 +08:00
|
|
|
emitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,
|
2015-03-21 18:12:56 +08:00
|
|
|
CodeGenFunction::OMPPrivateScope &PrivateScope) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
|
2015-08-18 14:47:21 +08:00
|
|
|
auto CurPrivate = C->privates().begin();
|
2015-04-27 16:00:32 +08:00
|
|
|
for (auto *E : C->varlists()) {
|
2015-08-18 14:47:21 +08:00
|
|
|
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
|
|
|
auto *PrivateVD =
|
|
|
|
cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
|
2015-08-18 14:47:21 +08:00
|
|
|
// Emit private VarDecl with copy init.
|
|
|
|
CGF.EmitVarDecl(*PrivateVD);
|
|
|
|
return CGF.GetAddrOfLocalVar(PrivateVD);
|
2015-03-21 18:12:56 +08:00
|
|
|
});
|
|
|
|
assert(IsRegistered && "linear var already registered as private");
|
|
|
|
// Silence the warning about unused variable.
|
|
|
|
(void)IsRegistered;
|
2015-08-18 14:47:21 +08:00
|
|
|
++CurPrivate;
|
2015-03-21 18:12:56 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-21 20:19:04 +08:00
|
|
|
static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
|
2015-12-31 14:52:34 +08:00
|
|
|
const OMPExecutableDirective &D,
|
|
|
|
bool IsMonotonic) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!CGF.HaveInsertPoint())
|
|
|
|
return;
|
2015-08-30 23:12:28 +08:00
|
|
|
if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
|
2015-08-21 20:19:04 +08:00
|
|
|
RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
|
|
|
|
/*ignoreResult=*/true);
|
|
|
|
llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
|
|
|
|
CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
|
|
|
|
// In presence of finite 'safelen', it may be unsafe to mark all
|
|
|
|
// the memory instructions parallel, because loop-carried
|
|
|
|
// dependences of 'safelen' iterations are possible.
|
2015-12-31 14:52:34 +08:00
|
|
|
if (!IsMonotonic)
|
|
|
|
CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
|
2015-08-30 23:12:28 +08:00
|
|
|
} else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
|
2015-06-17 15:45:51 +08:00
|
|
|
RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
|
|
|
|
/*ignoreResult=*/true);
|
|
|
|
llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
|
2015-07-15 07:03:09 +08:00
|
|
|
CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
|
2015-06-17 15:45:51 +08:00
|
|
|
// In presence of finite 'safelen', it may be unsafe to mark all
|
|
|
|
// the memory instructions parallel, because loop-carried
|
|
|
|
// dependences of 'safelen' iterations are possible.
|
|
|
|
CGF.LoopStack.setParallel(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-12-31 14:52:34 +08:00
|
|
|
void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
|
|
|
|
bool IsMonotonic) {
|
2015-06-18 12:45:29 +08:00
|
|
|
// Walk clauses and process safelen/lastprivate.
|
2015-12-31 14:52:34 +08:00
|
|
|
LoopStack.setParallel(!IsMonotonic);
|
2015-07-15 07:03:09 +08:00
|
|
|
LoopStack.setVectorizeEnable(true);
|
2015-12-31 14:52:34 +08:00
|
|
|
emitSimdlenSafelenClause(*this, D, IsMonotonic);
|
2015-06-18 12:45:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!HaveInsertPoint())
|
|
|
|
return;
|
2015-06-18 12:45:29 +08:00
|
|
|
auto IC = D.counters().begin();
|
|
|
|
for (auto F : D.finals()) {
|
|
|
|
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) {
|
2015-06-18 12:45:29 +08:00
|
|
|
DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
|
|
|
|
CapturedStmtInfo->lookup(OrigVD) != nullptr,
|
|
|
|
(*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address OrigAddr = EmitLValue(&DRE).getAddress();
|
2015-06-18 12:45:29 +08:00
|
|
|
OMPPrivateScope VarScope(*this);
|
|
|
|
VarScope.addPrivate(OrigVD,
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
[OrigAddr]() -> Address { return OrigAddr; });
|
2015-06-18 12:45:29 +08:00
|
|
|
(void)VarScope.Privatize();
|
|
|
|
EmitIgnoredExpr(F);
|
|
|
|
}
|
|
|
|
++IC;
|
|
|
|
}
|
|
|
|
emitLinearClauseFinal(*this, D);
|
|
|
|
}
|
|
|
|
|
2014-05-22 16:54:05 +08:00
|
|
|
void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
|
2015-04-10 12:50:10 +08:00
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
2015-04-22 19:59:37 +08:00
|
|
|
// if (PreCond) {
|
2015-04-10 12:50:10 +08:00
|
|
|
// for (IV in 0..LastIteration) BODY;
|
|
|
|
// <Final counter/linear vars updates>;
|
2015-04-22 19:59:37 +08:00
|
|
|
// }
|
2015-04-10 12:50:10 +08:00
|
|
|
//
|
|
|
|
|
2015-04-22 19:59:37 +08:00
|
|
|
// Emit: if (PreCond) - begin.
|
|
|
|
// If the condition constant folds and can be elided, avoid emitting the
|
|
|
|
// whole loop.
|
|
|
|
bool CondConstant;
|
|
|
|
llvm::BasicBlock *ContBlock = nullptr;
|
|
|
|
if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
|
|
|
|
if (!CondConstant)
|
|
|
|
return;
|
|
|
|
} else {
|
|
|
|
auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
|
|
|
|
ContBlock = CGF.createBasicBlock("simd.if.end");
|
2015-04-24 07:06:47 +08:00
|
|
|
emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
|
|
|
|
CGF.getProfileCount(&S));
|
2015-04-22 19:59:37 +08:00
|
|
|
CGF.EmitBlock(ThenBlock);
|
2015-04-24 07:06:47 +08:00
|
|
|
CGF.incrementProfileCounter(&S);
|
2015-04-22 19:59:37 +08:00
|
|
|
}
|
2015-03-21 18:12:56 +08:00
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
// Emit the loop iteration variable.
|
|
|
|
const Expr *IVExpr = S.getIterationVariable();
|
|
|
|
const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
|
|
|
|
CGF.EmitVarDecl(*IVDecl);
|
|
|
|
CGF.EmitIgnoredExpr(S.getInit());
|
|
|
|
|
|
|
|
// Emit the iterations count variable.
|
|
|
|
// If it is not a variable, Sema decided to calculate iterations count on
|
2015-05-21 15:59:51 +08:00
|
|
|
// each iteration (e.g., it is foldable into a constant).
|
2015-04-10 12:50:10 +08:00
|
|
|
if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
|
|
|
|
CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
|
|
|
|
// Emit calculation of the iterations count.
|
|
|
|
CGF.EmitIgnoredExpr(S.getCalcLastIteration());
|
|
|
|
}
|
2014-10-01 14:03:56 +08:00
|
|
|
|
2015-06-18 12:45:29 +08:00
|
|
|
CGF.EmitOMPSimdInit(S);
|
2014-10-01 14:03:56 +08:00
|
|
|
|
2015-06-18 12:45:29 +08:00
|
|
|
emitAlignedClause(CGF, S);
|
2015-06-18 18:10:12 +08:00
|
|
|
CGF.EmitOMPLinearClauseInit(S);
|
2015-06-16 21:14:42 +08:00
|
|
|
bool HasLastprivateClause;
|
2015-04-22 19:59:37 +08:00
|
|
|
{
|
|
|
|
OMPPrivateScope LoopScope(CGF);
|
2015-08-06 20:30:57 +08:00
|
|
|
emitPrivateLoopCounters(CGF, LoopScope, S.counters(),
|
|
|
|
S.private_counters());
|
2015-06-17 15:45:51 +08:00
|
|
|
emitPrivateLinearVars(CGF, S, LoopScope);
|
2015-04-22 19:59:37 +08:00
|
|
|
CGF.EmitOMPPrivateClause(S, LoopScope);
|
2015-06-17 14:21:39 +08:00
|
|
|
CGF.EmitOMPReductionClauseInit(S, LoopScope);
|
2015-06-16 21:14:42 +08:00
|
|
|
HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
|
2015-04-22 19:59:37 +08:00
|
|
|
(void)LoopScope.Privatize();
|
2015-07-02 12:17:07 +08:00
|
|
|
CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
|
|
|
|
S.getInc(),
|
2015-04-22 19:59:37 +08:00
|
|
|
[&S](CodeGenFunction &CGF) {
|
2015-07-02 12:17:07 +08:00
|
|
|
CGF.EmitOMPLoopBody(S, JumpDest());
|
2015-04-22 19:59:37 +08:00
|
|
|
CGF.EmitStopPoint(&S);
|
|
|
|
},
|
|
|
|
[](CodeGenFunction &) {});
|
2015-06-16 21:14:42 +08:00
|
|
|
// Emit final copy of the lastprivate variables at the end of loops.
|
|
|
|
if (HasLastprivateClause) {
|
|
|
|
CGF.EmitOMPLastprivateClauseFinal(S);
|
|
|
|
}
|
2015-06-17 14:21:39 +08:00
|
|
|
CGF.EmitOMPReductionClauseFinal(S);
|
2015-04-22 19:59:37 +08:00
|
|
|
}
|
2015-06-18 12:45:29 +08:00
|
|
|
CGF.EmitOMPSimdFinal(S);
|
2015-04-22 19:59:37 +08:00
|
|
|
// Emit: if (PreCond) - end.
|
|
|
|
if (ContBlock) {
|
2015-04-10 12:50:10 +08:00
|
|
|
CGF.EmitBranch(ContBlock);
|
|
|
|
CGF.EmitBlock(ContBlock, true);
|
2014-10-01 14:03:56 +08:00
|
|
|
}
|
2015-04-10 12:50:10 +08:00
|
|
|
};
|
2015-07-03 17:56:58 +08:00
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
|
2014-05-22 16:54:05 +08:00
|
|
|
}
|
|
|
|
|
2015-12-31 14:52:34 +08:00
|
|
|
void CodeGenFunction::EmitOMPForOuterLoop(
|
|
|
|
OpenMPScheduleClauseKind ScheduleKind, bool IsMonotonic,
|
|
|
|
const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
|
|
|
|
Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
|
2015-01-22 16:49:35 +08:00
|
|
|
auto &RT = CGM.getOpenMPRuntime();
|
2015-03-12 21:37:50 +08:00
|
|
|
|
|
|
|
// Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
|
2015-05-20 21:12:48 +08:00
|
|
|
const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind);
|
2015-03-12 21:37:50 +08:00
|
|
|
|
2015-05-20 21:12:48 +08:00
|
|
|
assert((Ordered ||
|
|
|
|
!RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) &&
|
2015-01-22 16:49:35 +08:00
|
|
|
"static non-chunked schedule does not need outer loop");
|
|
|
|
|
|
|
|
// Emit outer loop.
|
|
|
|
//
|
|
|
|
// OpenMP [2.7.1, Loop Construct, Description, table 2-1]
|
2015-03-12 21:37:50 +08:00
|
|
|
// When schedule(dynamic,chunk_size) is specified, the iterations are
|
|
|
|
// distributed to threads in the team in chunks as the threads request them.
|
|
|
|
// Each thread executes a chunk of iterations, then requests another chunk,
|
|
|
|
// until no chunks remain to be distributed. Each chunk contains chunk_size
|
|
|
|
// iterations, except for the last chunk to be distributed, which may have
|
|
|
|
// fewer iterations. When no chunk_size is specified, it defaults to 1.
|
|
|
|
//
|
|
|
|
// When schedule(guided,chunk_size) is specified, the iterations are assigned
|
|
|
|
// to threads in the team in chunks as the executing threads request them.
|
|
|
|
// Each thread executes a chunk of iterations, then requests another chunk,
|
|
|
|
// until no chunks remain to be assigned. For a chunk_size of 1, the size of
|
|
|
|
// each chunk is proportional to the number of unassigned iterations divided
|
|
|
|
// by the number of threads in the team, decreasing to 1. For a chunk_size
|
|
|
|
// with value k (greater than 1), the size of each chunk is determined in the
|
|
|
|
// same way, with the restriction that the chunks do not contain fewer than k
|
|
|
|
// iterations (except for the last chunk to be assigned, which may have fewer
|
|
|
|
// than k iterations).
|
|
|
|
//
|
|
|
|
// When schedule(auto) is specified, the decision regarding scheduling is
|
|
|
|
// delegated to the compiler and/or runtime system. The programmer gives the
|
|
|
|
// implementation the freedom to choose any possible mapping of iterations to
|
|
|
|
// threads in the team.
|
|
|
|
//
|
|
|
|
// When schedule(runtime) is specified, the decision regarding scheduling is
|
|
|
|
// deferred until run time, and the schedule and chunk size are taken from the
|
|
|
|
// run-sched-var ICV. If the ICV is set to auto, the schedule is
|
|
|
|
// implementation defined
|
|
|
|
//
|
|
|
|
// while(__kmpc_dispatch_next(&LB, &UB)) {
|
|
|
|
// idx = LB;
|
2015-04-22 19:15:40 +08:00
|
|
|
// while (idx <= UB) { BODY; ++idx;
|
|
|
|
// __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
|
|
|
|
// } // inner loop
|
2015-03-12 21:37:50 +08:00
|
|
|
// }
|
|
|
|
//
|
|
|
|
// OpenMP [2.7.1, Loop Construct, Description, table 2-1]
|
2015-01-22 16:49:35 +08:00
|
|
|
// When schedule(static, chunk_size) is specified, iterations are divided into
|
|
|
|
// chunks of size chunk_size, and the chunks are assigned to the threads in
|
|
|
|
// the team in a round-robin fashion in the order of the thread number.
|
|
|
|
//
|
|
|
|
// while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
|
|
|
|
// while (idx <= UB) { BODY; ++idx; } // inner loop
|
|
|
|
// LB = LB + ST;
|
|
|
|
// UB = UB + ST;
|
|
|
|
// }
|
|
|
|
//
|
2015-03-12 21:37:50 +08:00
|
|
|
|
2015-01-22 16:49:35 +08:00
|
|
|
const Expr *IVExpr = S.getIterationVariable();
|
|
|
|
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
|
|
|
|
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
if (DynamicOrOrdered) {
|
|
|
|
llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration());
|
|
|
|
RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind,
|
|
|
|
IVSize, IVSigned, Ordered, UBVal, Chunk);
|
|
|
|
} else {
|
|
|
|
RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
|
|
|
|
IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk);
|
|
|
|
}
|
2015-03-12 21:37:50 +08:00
|
|
|
|
2015-01-22 16:49:35 +08:00
|
|
|
auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
|
|
|
|
|
|
|
|
// Start the loop with a block that tests the condition.
|
|
|
|
auto CondBlock = createBasicBlock("omp.dispatch.cond");
|
|
|
|
EmitBlock(CondBlock);
|
|
|
|
LoopStack.push(CondBlock);
|
|
|
|
|
|
|
|
llvm::Value *BoolCondVal = nullptr;
|
2015-05-20 21:12:48 +08:00
|
|
|
if (!DynamicOrOrdered) {
|
2015-03-12 21:37:50 +08:00
|
|
|
// UB = min(UB, GlobalUB)
|
|
|
|
EmitIgnoredExpr(S.getEnsureUpperBound());
|
|
|
|
// IV = LB
|
|
|
|
EmitIgnoredExpr(S.getInit());
|
|
|
|
// IV < UB
|
2015-06-16 19:59:36 +08:00
|
|
|
BoolCondVal = EvaluateExprAsBool(S.getCond());
|
2015-03-12 21:37:50 +08:00
|
|
|
} else {
|
|
|
|
BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned,
|
|
|
|
IL, LB, UB, ST);
|
|
|
|
}
|
2015-01-22 16:49:35 +08:00
|
|
|
|
|
|
|
// If there are any cleanups between here and the loop-exit scope,
|
|
|
|
// create a block to stage a loop exit along.
|
|
|
|
auto ExitBlock = LoopExit.getBlock();
|
|
|
|
if (LoopScope.requiresCleanups())
|
|
|
|
ExitBlock = createBasicBlock("omp.dispatch.cleanup");
|
|
|
|
|
|
|
|
auto LoopBody = createBasicBlock("omp.dispatch.body");
|
|
|
|
Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
|
|
|
|
if (ExitBlock != LoopExit.getBlock()) {
|
|
|
|
EmitBlock(ExitBlock);
|
|
|
|
EmitBranchThroughCleanup(LoopExit);
|
|
|
|
}
|
|
|
|
EmitBlock(LoopBody);
|
|
|
|
|
2015-03-12 21:37:50 +08:00
|
|
|
// Emit "IV = LB" (in case of static schedule, we have already calculated new
|
|
|
|
// LB for loop condition and emitted it above).
|
2015-05-20 21:12:48 +08:00
|
|
|
if (DynamicOrOrdered)
|
2015-03-12 21:37:50 +08:00
|
|
|
EmitIgnoredExpr(S.getInit());
|
|
|
|
|
2015-01-22 16:49:35 +08:00
|
|
|
// Create a block for the increment.
|
|
|
|
auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
|
|
|
|
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
|
|
|
|
|
2015-06-18 12:45:29 +08:00
|
|
|
// Generate !llvm.loop.parallel metadata for loads and stores for loops
|
|
|
|
// with dynamic/guided scheduling and without ordered clause.
|
2015-12-31 14:52:34 +08:00
|
|
|
if (!isOpenMPSimdDirective(S.getDirectiveKind()))
|
|
|
|
LoopStack.setParallel(!IsMonotonic);
|
|
|
|
else
|
|
|
|
EmitOMPSimdInit(S, IsMonotonic);
|
2015-06-18 12:45:29 +08:00
|
|
|
|
2015-04-22 19:15:40 +08:00
|
|
|
SourceLocation Loc = S.getLocStart();
|
2015-07-02 12:17:07 +08:00
|
|
|
EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
|
|
|
|
[&S, LoopExit](CodeGenFunction &CGF) {
|
|
|
|
CGF.EmitOMPLoopBody(S, LoopExit);
|
|
|
|
CGF.EmitStopPoint(&S);
|
|
|
|
},
|
|
|
|
[Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) {
|
|
|
|
if (Ordered) {
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(
|
|
|
|
CGF, Loc, IVSize, IVSigned);
|
|
|
|
}
|
|
|
|
});
|
2015-01-22 16:49:35 +08:00
|
|
|
|
|
|
|
EmitBlock(Continue.getBlock());
|
|
|
|
BreakContinueStack.pop_back();
|
2015-05-20 21:12:48 +08:00
|
|
|
if (!DynamicOrOrdered) {
|
2015-03-12 21:37:50 +08:00
|
|
|
// Emit "LB = LB + Stride", "UB = UB + Stride".
|
|
|
|
EmitIgnoredExpr(S.getNextLowerBound());
|
|
|
|
EmitIgnoredExpr(S.getNextUpperBound());
|
|
|
|
}
|
2015-01-22 16:49:35 +08:00
|
|
|
|
|
|
|
EmitBranch(CondBlock);
|
|
|
|
LoopStack.pop();
|
|
|
|
// Emit the fall-through block.
|
|
|
|
EmitBlock(LoopExit.getBlock());
|
|
|
|
|
|
|
|
// Tell the runtime we are done.
|
2015-05-20 21:12:48 +08:00
|
|
|
if (!DynamicOrOrdered)
|
2015-04-22 19:15:40 +08:00
|
|
|
RT.emitForStaticFinish(*this, S.getLocEnd());
|
2015-01-22 16:49:35 +08:00
|
|
|
}
|
|
|
|
|
2014-12-15 15:07:06 +08:00
|
|
|
/// \brief Emit a helper variable and return corresponding lvalue.
|
|
|
|
static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
|
|
|
|
const DeclRefExpr *Helper) {
|
|
|
|
auto VDecl = cast<VarDecl>(Helper->getDecl());
|
|
|
|
CGF.EmitVarDecl(*VDecl);
|
|
|
|
return CGF.EmitLValue(Helper);
|
|
|
|
}
|
|
|
|
|
2015-12-31 14:52:34 +08:00
|
|
|
namespace {
|
|
|
|
struct ScheduleKindModifiersTy {
|
|
|
|
OpenMPScheduleClauseKind Kind;
|
|
|
|
OpenMPScheduleClauseModifier M1;
|
|
|
|
OpenMPScheduleClauseModifier M2;
|
|
|
|
ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
|
|
|
|
OpenMPScheduleClauseModifier M1,
|
|
|
|
OpenMPScheduleClauseModifier M2)
|
|
|
|
: Kind(Kind), M1(M1), M2(M2) {}
|
|
|
|
};
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
static std::pair<llvm::Value * /*Chunk*/, ScheduleKindModifiersTy>
|
2015-05-12 16:35:28 +08:00
|
|
|
emitScheduleClause(CodeGenFunction &CGF, const OMPLoopDirective &S,
|
|
|
|
bool OuterRegion) {
|
|
|
|
// Detect the loop schedule kind and chunk.
|
|
|
|
auto ScheduleKind = OMPC_SCHEDULE_unknown;
|
2015-12-31 14:52:34 +08:00
|
|
|
OpenMPScheduleClauseModifier M1 = OMPC_SCHEDULE_MODIFIER_unknown;
|
|
|
|
OpenMPScheduleClauseModifier M2 = OMPC_SCHEDULE_MODIFIER_unknown;
|
2015-05-12 16:35:28 +08:00
|
|
|
llvm::Value *Chunk = nullptr;
|
2015-08-30 23:12:28 +08:00
|
|
|
if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
|
2015-05-12 16:35:28 +08:00
|
|
|
ScheduleKind = C->getScheduleKind();
|
2015-12-31 14:52:34 +08:00
|
|
|
M1 = C->getFirstScheduleModifier();
|
|
|
|
M2 = C->getSecondScheduleModifier();
|
2015-05-12 16:35:28 +08:00
|
|
|
if (const auto *Ch = C->getChunkSize()) {
|
|
|
|
if (auto *ImpRef = cast_or_null<DeclRefExpr>(C->getHelperChunkSize())) {
|
|
|
|
if (OuterRegion) {
|
|
|
|
const VarDecl *ImpVar = cast<VarDecl>(ImpRef->getDecl());
|
|
|
|
CGF.EmitVarDecl(*ImpVar);
|
|
|
|
CGF.EmitStoreThroughLValue(
|
|
|
|
CGF.EmitAnyExpr(Ch),
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(ImpVar),
|
|
|
|
ImpVar->getType()));
|
2015-05-12 16:35:28 +08:00
|
|
|
} else {
|
|
|
|
Ch = ImpRef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!C->getHelperChunkSize() || !OuterRegion) {
|
|
|
|
Chunk = CGF.EmitScalarExpr(Ch);
|
|
|
|
Chunk = CGF.EmitScalarConversion(Chunk, Ch->getType(),
|
2015-08-11 12:19:28 +08:00
|
|
|
S.getIterationVariable()->getType(),
|
|
|
|
S.getLocStart());
|
2015-05-12 16:35:28 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-12-31 14:52:34 +08:00
|
|
|
return std::make_pair(Chunk, ScheduleKindModifiersTy(ScheduleKind, M1, M2));
|
2015-05-12 16:35:28 +08:00
|
|
|
}
|
|
|
|
|
2015-04-16 12:54:05 +08:00
|
|
|
bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
|
2014-12-15 15:07:06 +08:00
|
|
|
// Emit the loop iteration variable.
|
|
|
|
auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
|
|
|
|
auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
|
|
|
|
EmitVarDecl(*IVDecl);
|
|
|
|
|
|
|
|
// Emit the iterations count variable.
|
|
|
|
// If it is not a variable, Sema decided to calculate iterations count on each
|
|
|
|
// iteration (e.g., it is foldable into a constant).
|
|
|
|
if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
|
|
|
|
EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
|
|
|
|
// Emit calculation of the iterations count.
|
|
|
|
EmitIgnoredExpr(S.getCalcLastIteration());
|
|
|
|
}
|
|
|
|
|
|
|
|
auto &RT = CGM.getOpenMPRuntime();
|
|
|
|
|
2015-04-16 12:54:05 +08:00
|
|
|
bool HasLastprivateClause;
|
2014-12-15 15:07:06 +08:00
|
|
|
// Check pre-condition.
|
|
|
|
{
|
|
|
|
// Skip the entire loop if we don't meet the precondition.
|
2015-04-22 19:59:37 +08:00
|
|
|
// If the condition constant folds and can be elided, avoid emitting the
|
|
|
|
// whole loop.
|
|
|
|
bool CondConstant;
|
|
|
|
llvm::BasicBlock *ContBlock = nullptr;
|
|
|
|
if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
|
|
|
|
if (!CondConstant)
|
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
auto *ThenBlock = createBasicBlock("omp.precond.then");
|
|
|
|
ContBlock = createBasicBlock("omp.precond.end");
|
|
|
|
emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
|
2015-04-24 07:06:47 +08:00
|
|
|
getProfileCount(&S));
|
2015-04-22 19:59:37 +08:00
|
|
|
EmitBlock(ThenBlock);
|
2015-04-24 07:06:47 +08:00
|
|
|
incrementProfileCounter(&S);
|
2015-04-22 19:59:37 +08:00
|
|
|
}
|
2015-06-18 12:45:29 +08:00
|
|
|
|
|
|
|
emitAlignedClause(*this, S);
|
2015-06-18 18:10:12 +08:00
|
|
|
EmitOMPLinearClauseInit(S);
|
2014-12-15 15:07:06 +08:00
|
|
|
// Emit 'then' code.
|
|
|
|
{
|
|
|
|
// Emit helper vars inits.
|
|
|
|
LValue LB =
|
|
|
|
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
|
|
|
|
LValue UB =
|
|
|
|
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
|
|
|
|
LValue ST =
|
|
|
|
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
|
|
|
|
LValue IL =
|
|
|
|
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
|
|
|
|
|
|
|
|
OMPPrivateScope LoopScope(*this);
|
2015-04-15 12:52:20 +08:00
|
|
|
if (EmitOMPFirstprivateClause(S, LoopScope)) {
|
|
|
|
// Emit implicit barrier to synchronize threads and avoid data races on
|
|
|
|
// initialization of firstprivate variables.
|
2015-09-15 20:52:43 +08:00
|
|
|
CGM.getOpenMPRuntime().emitBarrierCall(
|
|
|
|
*this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
|
|
|
|
/*ForceSimpleCall=*/true);
|
2015-04-15 12:52:20 +08:00
|
|
|
}
|
2015-04-22 20:24:45 +08:00
|
|
|
EmitOMPPrivateClause(S, LoopScope);
|
2015-04-16 12:54:05 +08:00
|
|
|
HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
|
[OPENMP] Codegen for 'reduction' clause in 'for' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
*(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
...
*(Type<n>-1*)lhs[<n>-1] =
ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
*(Type<n>-1*)rhs[<n>-1]);
}
...
void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
<LHSExprs>[0] = ReductionOperation0(*<LHSExprs>[0], *<RHSExprs>[0]);
...
<LHSExprs>[<n>-1] = ReductionOperation<n>-1(*<LHSExprs>[<n>-1], *<RHSExprs>[<n>-1]);
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
Atomic(<LHSExprs>[0] = ReductionOperation0(*<LHSExprs>[0], *<RHSExprs>[0]));
...
Atomic(<LHSExprs>[<n>-1] = ReductionOperation<n>-1(*<LHSExprs>[<n>-1], *<RHSExprs>[<n>-1]));
break;
default:;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D9139
llvm-svn: 235506
2015-04-22 21:43:03 +08:00
|
|
|
EmitOMPReductionClauseInit(S, LoopScope);
|
2015-08-06 20:30:57 +08:00
|
|
|
emitPrivateLoopCounters(*this, LoopScope, S.counters(),
|
|
|
|
S.private_counters());
|
2015-06-18 12:45:29 +08:00
|
|
|
emitPrivateLinearVars(*this, S, LoopScope);
|
2015-03-16 15:14:41 +08:00
|
|
|
(void)LoopScope.Privatize();
|
2014-12-15 15:07:06 +08:00
|
|
|
|
|
|
|
// Detect the loop schedule kind and chunk.
|
2015-05-12 16:35:28 +08:00
|
|
|
llvm::Value *Chunk;
|
|
|
|
OpenMPScheduleClauseKind ScheduleKind;
|
|
|
|
auto ScheduleInfo =
|
|
|
|
emitScheduleClause(*this, S, /*OuterRegion=*/false);
|
|
|
|
Chunk = ScheduleInfo.first;
|
2015-12-31 14:52:34 +08:00
|
|
|
ScheduleKind = ScheduleInfo.second.Kind;
|
|
|
|
const OpenMPScheduleClauseModifier M1 = ScheduleInfo.second.M1;
|
|
|
|
const OpenMPScheduleClauseModifier M2 = ScheduleInfo.second.M2;
|
2014-12-15 15:07:06 +08:00
|
|
|
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
|
|
|
|
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
|
2015-08-30 23:12:28 +08:00
|
|
|
const bool Ordered = S.getSingleClause<OMPOrderedClause>() != nullptr;
|
2015-12-31 14:52:34 +08:00
|
|
|
// OpenMP 4.5, 2.7.1 Loop Construct, Description.
|
|
|
|
// If the static schedule kind is specified or if the ordered clause is
|
|
|
|
// specified, and if no monotonic modifier is specified, the effect will
|
|
|
|
// be as if the monotonic modifier was specified.
|
2014-12-15 15:07:06 +08:00
|
|
|
if (RT.isStaticNonchunked(ScheduleKind,
|
2015-05-20 21:12:48 +08:00
|
|
|
/* Chunked */ Chunk != nullptr) &&
|
|
|
|
!Ordered) {
|
2015-12-31 14:52:34 +08:00
|
|
|
if (isOpenMPSimdDirective(S.getDirectiveKind()))
|
|
|
|
EmitOMPSimdInit(S, /*IsMonotonic=*/true);
|
2014-12-15 15:07:06 +08:00
|
|
|
// OpenMP [2.7.1, Loop Construct, Description, table 2-1]
|
|
|
|
// When no chunk_size is specified, the iteration space is divided into
|
|
|
|
// chunks that are approximately equal in size, and at most one chunk is
|
|
|
|
// distributed to each thread. Note that the size of the chunks is
|
|
|
|
// unspecified in this case.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
|
|
|
|
IVSize, IVSigned, Ordered,
|
|
|
|
IL.getAddress(), LB.getAddress(),
|
|
|
|
UB.getAddress(), ST.getAddress());
|
2015-12-31 14:52:34 +08:00
|
|
|
auto LoopExit =
|
|
|
|
getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
|
2014-12-15 15:07:06 +08:00
|
|
|
// UB = min(UB, GlobalUB);
|
|
|
|
EmitIgnoredExpr(S.getEnsureUpperBound());
|
|
|
|
// IV = LB;
|
|
|
|
EmitIgnoredExpr(S.getInit());
|
|
|
|
// while (idx <= UB) { BODY; ++idx; }
|
2015-06-16 19:59:36 +08:00
|
|
|
EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
|
|
|
|
S.getInc(),
|
2015-07-02 12:17:07 +08:00
|
|
|
[&S, LoopExit](CodeGenFunction &CGF) {
|
|
|
|
CGF.EmitOMPLoopBody(S, LoopExit);
|
2015-04-10 12:50:10 +08:00
|
|
|
CGF.EmitStopPoint(&S);
|
2015-04-22 19:15:40 +08:00
|
|
|
},
|
|
|
|
[](CodeGenFunction &) {});
|
2015-07-02 12:17:07 +08:00
|
|
|
EmitBlock(LoopExit.getBlock());
|
2014-12-15 15:07:06 +08:00
|
|
|
// Tell the runtime we are done.
|
2015-04-22 19:15:40 +08:00
|
|
|
RT.emitForStaticFinish(*this, S.getLocStart());
|
2015-01-22 16:49:35 +08:00
|
|
|
} else {
|
2015-12-31 14:52:34 +08:00
|
|
|
const bool IsMonotonic = Ordered ||
|
|
|
|
ScheduleKind == OMPC_SCHEDULE_static ||
|
|
|
|
ScheduleKind == OMPC_SCHEDULE_unknown ||
|
|
|
|
M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
|
|
|
|
M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
|
2015-01-22 16:49:35 +08:00
|
|
|
// Emit the outer loop, which requests its work chunk [LB..UB] from
|
|
|
|
// runtime and runs the inner loop to process it.
|
2015-12-31 14:52:34 +08:00
|
|
|
EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
|
2015-05-20 21:12:48 +08:00
|
|
|
LB.getAddress(), UB.getAddress(), ST.getAddress(),
|
|
|
|
IL.getAddress(), Chunk);
|
2015-01-22 16:49:35 +08:00
|
|
|
}
|
[OPENMP] Codegen for 'reduction' clause in 'for' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
*(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
...
*(Type<n>-1*)lhs[<n>-1] =
ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
*(Type<n>-1*)rhs[<n>-1]);
}
...
void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
<LHSExprs>[0] = ReductionOperation0(*<LHSExprs>[0], *<RHSExprs>[0]);
...
<LHSExprs>[<n>-1] = ReductionOperation<n>-1(*<LHSExprs>[<n>-1], *<RHSExprs>[<n>-1]);
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
Atomic(<LHSExprs>[0] = ReductionOperation0(*<LHSExprs>[0], *<RHSExprs>[0]));
...
Atomic(<LHSExprs>[<n>-1] = ReductionOperation<n>-1(*<LHSExprs>[<n>-1], *<RHSExprs>[<n>-1]));
break;
default:;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D9139
llvm-svn: 235506
2015-04-22 21:43:03 +08:00
|
|
|
EmitOMPReductionClauseFinal(S);
|
2015-04-16 12:54:05 +08:00
|
|
|
// Emit final copy of the lastprivate variables if IsLastIter != 0.
|
|
|
|
if (HasLastprivateClause)
|
|
|
|
EmitOMPLastprivateClauseFinal(
|
|
|
|
S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
|
2014-12-15 15:07:06 +08:00
|
|
|
}
|
2015-06-18 12:45:29 +08:00
|
|
|
if (isOpenMPSimdDirective(S.getDirectiveKind())) {
|
|
|
|
EmitOMPSimdFinal(S);
|
|
|
|
}
|
2014-12-15 15:07:06 +08:00
|
|
|
// We're now done with the loop, so jump to the continuation block.
|
2015-04-22 19:59:37 +08:00
|
|
|
if (ContBlock) {
|
|
|
|
EmitBranch(ContBlock);
|
|
|
|
EmitBlock(ContBlock, true);
|
|
|
|
}
|
2014-12-15 15:07:06 +08:00
|
|
|
}
|
2015-04-16 12:54:05 +08:00
|
|
|
return HasLastprivateClause;
|
2014-12-15 15:07:06 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
|
2015-04-10 12:50:10 +08:00
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
2015-04-16 12:54:05 +08:00
|
|
|
bool HasLastprivates = false;
|
|
|
|
auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
|
|
|
|
HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
|
|
|
|
};
|
2015-09-15 20:52:43 +08:00
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
|
|
|
|
S.hasCancel());
|
2014-12-15 15:07:06 +08:00
|
|
|
|
|
|
|
// Emit an implicit barrier at the end.
|
2015-08-30 23:12:28 +08:00
|
|
|
if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
|
2015-03-30 12:30:22 +08:00
|
|
|
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
|
|
|
|
}
|
2014-06-18 12:14:57 +08:00
|
|
|
}
|
2014-06-25 19:44:49 +08:00
|
|
|
|
2015-06-17 15:45:51 +08:00
|
|
|
void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
|
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
|
|
|
bool HasLastprivates = false;
|
|
|
|
auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
|
|
|
|
HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
|
|
|
|
};
|
2015-07-03 17:56:58 +08:00
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
|
2015-06-17 15:45:51 +08:00
|
|
|
|
|
|
|
// Emit an implicit barrier at the end.
|
2015-08-30 23:12:28 +08:00
|
|
|
if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
|
2015-06-17 15:45:51 +08:00
|
|
|
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
|
|
|
|
}
|
2014-09-18 13:12:34 +08:00
|
|
|
}
|
|
|
|
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
|
|
|
|
const Twine &Name,
|
|
|
|
llvm::Value *Init = nullptr) {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
if (Init)
|
|
|
|
CGF.EmitScalarInit(Init, LVal);
|
|
|
|
return LVal;
|
|
|
|
}
|
|
|
|
|
2015-07-02 12:17:07 +08:00
|
|
|
OpenMPDirectiveKind
|
|
|
|
CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
|
|
|
|
auto *CS = dyn_cast<CompoundStmt>(Stmt);
|
2016-01-22 16:56:50 +08:00
|
|
|
bool HasLastprivates = false;
|
|
|
|
auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF) {
|
|
|
|
auto &C = CGF.CGM.getContext();
|
|
|
|
auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
|
|
|
|
// Emit helper vars inits.
|
|
|
|
LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
|
|
|
|
CGF.Builder.getInt32(0));
|
|
|
|
auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
|
|
|
|
: CGF.Builder.getInt32(0);
|
|
|
|
LValue UB =
|
|
|
|
createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
|
|
|
|
LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
|
|
|
|
CGF.Builder.getInt32(1));
|
|
|
|
LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
|
|
|
|
CGF.Builder.getInt32(0));
|
|
|
|
// Loop counter.
|
|
|
|
LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
|
|
|
|
OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
|
|
|
|
CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
|
|
|
|
OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
|
|
|
|
CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
|
|
|
|
// Generate condition for loop.
|
|
|
|
BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
|
|
|
|
OK_Ordinary, S.getLocStart(),
|
|
|
|
/*fpContractable=*/false);
|
|
|
|
// Increment for loop counter.
|
|
|
|
UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
|
|
|
|
S.getLocStart());
|
|
|
|
auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
|
|
|
|
// Iterate through all sections and emit a switch construct:
|
|
|
|
// switch (IV) {
|
|
|
|
// case 0:
|
|
|
|
// <SectionStmt[0]>;
|
|
|
|
// break;
|
|
|
|
// ...
|
|
|
|
// case <NumSection> - 1:
|
|
|
|
// <SectionStmt[<NumSection> - 1]>;
|
|
|
|
// break;
|
|
|
|
// }
|
|
|
|
// .omp.sections.exit:
|
|
|
|
auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
|
|
|
|
auto *SwitchStmt = CGF.Builder.CreateSwitch(
|
|
|
|
CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
|
|
|
|
CS == nullptr ? 1 : CS->size());
|
|
|
|
if (CS) {
|
2015-04-10 12:50:10 +08:00
|
|
|
unsigned CaseNumber = 0;
|
2015-07-03 05:03:14 +08:00
|
|
|
for (auto *SubStmt : CS->children()) {
|
2015-04-10 12:50:10 +08:00
|
|
|
auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
|
|
|
|
CGF.EmitBlock(CaseBB);
|
|
|
|
SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
|
2015-07-03 05:03:14 +08:00
|
|
|
CGF.EmitStmt(SubStmt);
|
2015-04-10 12:50:10 +08:00
|
|
|
CGF.EmitBranch(ExitBB);
|
2015-07-03 05:03:14 +08:00
|
|
|
++CaseNumber;
|
2015-04-10 12:50:10 +08:00
|
|
|
}
|
2016-01-22 16:56:50 +08:00
|
|
|
} else {
|
|
|
|
auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
|
|
|
|
CGF.EmitBlock(CaseBB);
|
|
|
|
SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
|
|
|
|
CGF.EmitStmt(Stmt);
|
|
|
|
CGF.EmitBranch(ExitBB);
|
2015-04-24 11:37:03 +08:00
|
|
|
}
|
2016-01-22 16:56:50 +08:00
|
|
|
CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
};
|
2015-04-10 12:50:10 +08:00
|
|
|
|
2016-01-22 16:56:50 +08:00
|
|
|
CodeGenFunction::OMPPrivateScope LoopScope(CGF);
|
|
|
|
if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
|
2015-04-27 12:34:03 +08:00
|
|
|
// Emit implicit barrier to synchronize threads and avoid data races on
|
|
|
|
// initialization of firstprivate variables.
|
2016-01-22 16:56:50 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitBarrierCall(
|
|
|
|
CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
|
|
|
|
/*ForceSimpleCall=*/true);
|
2015-04-27 12:34:03 +08:00
|
|
|
}
|
2016-01-22 16:56:50 +08:00
|
|
|
CGF.EmitOMPPrivateClause(S, LoopScope);
|
|
|
|
HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
|
|
|
|
CGF.EmitOMPReductionClauseInit(S, LoopScope);
|
|
|
|
(void)LoopScope.Privatize();
|
|
|
|
|
|
|
|
// Emit static non-chunked loop.
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitForStaticInit(
|
|
|
|
CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
|
|
|
|
/*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(),
|
|
|
|
UB.getAddress(), ST.getAddress());
|
|
|
|
// UB = min(UB, GlobalUB);
|
|
|
|
auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
|
|
|
|
auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
|
|
|
|
CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
|
|
|
|
CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
|
|
|
|
// IV = LB;
|
|
|
|
CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
|
|
|
|
// while (idx <= UB) { BODY; ++idx; }
|
|
|
|
CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
|
|
|
|
[](CodeGenFunction &) {});
|
|
|
|
// Tell the runtime we are done.
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart());
|
|
|
|
CGF.EmitOMPReductionClauseFinal(S);
|
2015-04-24 11:37:03 +08:00
|
|
|
|
2016-01-22 16:56:50 +08:00
|
|
|
// Emit final copy of the lastprivate variables if IsLastIter != 0.
|
|
|
|
if (HasLastprivates)
|
|
|
|
CGF.EmitOMPLastprivateClauseFinal(
|
|
|
|
S, CGF.Builder.CreateIsNotNull(
|
|
|
|
CGF.EmitLoadOfScalar(IL, S.getLocStart())));
|
2015-04-14 11:29:22 +08:00
|
|
|
};
|
2016-01-22 16:56:50 +08:00
|
|
|
|
|
|
|
bool HasCancel = false;
|
|
|
|
if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
|
|
|
|
HasCancel = OSD->hasCancel();
|
|
|
|
else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
|
|
|
|
HasCancel = OPSD->hasCancel();
|
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
|
|
|
|
HasCancel);
|
|
|
|
// Emit barrier for lastprivates only if 'sections' directive has 'nowait'
|
|
|
|
// clause. Otherwise the barrier will be generated by the codegen for the
|
|
|
|
// directive.
|
|
|
|
if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
|
2015-04-24 11:37:03 +08:00
|
|
|
// Emit implicit barrier to synchronize threads and avoid data races on
|
|
|
|
// initialization of firstprivate variables.
|
2016-01-22 16:56:50 +08:00
|
|
|
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
|
|
|
|
OMPD_unknown);
|
2015-04-24 11:37:03 +08:00
|
|
|
}
|
2016-01-22 16:56:50 +08:00
|
|
|
return OMPD_sections;
|
2015-04-14 11:29:22 +08:00
|
|
|
}
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
|
2015-04-14 11:29:22 +08:00
|
|
|
void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
|
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
2015-07-02 12:17:07 +08:00
|
|
|
OpenMPDirectiveKind EmittedAs = EmitSections(S);
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
// Emit an implicit barrier at the end.
|
2015-08-30 23:12:28 +08:00
|
|
|
if (!S.getSingleClause<OMPNowaitClause>()) {
|
2015-04-14 11:29:22 +08:00
|
|
|
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs);
|
2015-03-30 12:30:22 +08:00
|
|
|
}
|
2014-06-25 19:44:49 +08:00
|
|
|
}
|
|
|
|
|
[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.
#pragma omp sections
{
#pragma omp section
{1}
...
#pragma omp section
{n}
}
is translated to something like
i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
case 0:
{1};
break;
...
case <NumSection> - 1:
{n};
break;
}
++<iter_var>
br label check
exit:
call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244
llvm-svn: 232021
2015-03-12 16:53:29 +08:00
|
|
|
void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
|
2015-04-10 12:50:10 +08:00
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
|
|
|
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
|
|
|
};
|
2015-09-15 20:52:43 +08:00
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
|
|
|
|
S.hasCancel());
|
2014-06-26 16:21:58 +08:00
|
|
|
}
|
|
|
|
|
2015-02-05 14:35:41 +08:00
|
|
|
void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
|
2015-03-23 14:18:07 +08:00
|
|
|
llvm::SmallVector<const Expr *, 8> CopyprivateVars;
|
2015-04-14 13:11:24 +08:00
|
|
|
llvm::SmallVector<const Expr *, 8> DestExprs;
|
2015-03-23 14:18:07 +08:00
|
|
|
llvm::SmallVector<const Expr *, 8> SrcExprs;
|
|
|
|
llvm::SmallVector<const Expr *, 8> AssignmentOps;
|
2015-04-10 12:50:10 +08:00
|
|
|
// Check if there are any 'copyprivate' clauses associated with this
|
|
|
|
// 'single'
|
2015-03-23 14:18:07 +08:00
|
|
|
// construct.
|
|
|
|
// Build a list of copyprivate variables along with helper expressions
|
|
|
|
// (<source>, <destination>, <destination>=<source> expressions)
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
|
2015-03-23 14:18:07 +08:00
|
|
|
CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
|
2015-04-14 13:11:24 +08:00
|
|
|
DestExprs.append(C->destination_exprs().begin(),
|
|
|
|
C->destination_exprs().end());
|
2015-03-23 14:18:07 +08:00
|
|
|
SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
|
|
|
|
AssignmentOps.append(C->assignment_ops().begin(),
|
|
|
|
C->assignment_ops().end());
|
|
|
|
}
|
2015-04-10 12:50:10 +08:00
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
2015-03-23 14:18:07 +08:00
|
|
|
// Emit code for 'single' region along with 'copyprivate' clauses
|
2015-04-24 12:21:15 +08:00
|
|
|
bool HasFirstprivates;
|
|
|
|
auto &&CodeGen = [&S, &HasFirstprivates](CodeGenFunction &CGF) {
|
|
|
|
CodeGenFunction::OMPPrivateScope SingleScope(CGF);
|
|
|
|
HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope);
|
2015-04-27 11:48:52 +08:00
|
|
|
CGF.EmitOMPPrivateClause(S, SingleScope);
|
2015-04-24 12:21:15 +08:00
|
|
|
(void)SingleScope.Privatize();
|
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
|
|
|
};
|
|
|
|
CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
|
2015-04-14 13:11:24 +08:00
|
|
|
CopyprivateVars, DestExprs, SrcExprs,
|
2015-04-10 12:50:10 +08:00
|
|
|
AssignmentOps);
|
2015-04-24 12:21:15 +08:00
|
|
|
// Emit an implicit barrier at the end (to avoid data race on firstprivate
|
|
|
|
// init or if no 'nowait' clause was specified and no 'copyprivate' clause).
|
2015-08-30 23:12:28 +08:00
|
|
|
if ((!S.getSingleClause<OMPNowaitClause>() || HasFirstprivates) &&
|
2015-04-24 12:21:15 +08:00
|
|
|
CopyprivateVars.empty()) {
|
|
|
|
CGM.getOpenMPRuntime().emitBarrierCall(
|
|
|
|
*this, S.getLocStart(),
|
2015-08-30 23:12:28 +08:00
|
|
|
S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
|
2015-03-30 12:30:22 +08:00
|
|
|
}
|
2014-06-26 20:05:45 +08:00
|
|
|
}
|
|
|
|
|
2014-12-04 15:23:53 +08:00
|
|
|
void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
|
2015-04-10 12:50:10 +08:00
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
|
|
|
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
|
|
|
};
|
|
|
|
CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
|
2014-07-17 16:54:58 +08:00
|
|
|
}
|
|
|
|
|
2014-09-22 18:01:53 +08:00
|
|
|
void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
|
2015-04-10 12:50:10 +08:00
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
|
|
|
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
|
|
|
};
|
2015-12-15 18:55:09 +08:00
|
|
|
Expr *Hint = nullptr;
|
|
|
|
if (auto *HintClause = S.getSingleClause<OMPHintClause>())
|
|
|
|
Hint = HintClause->getHint();
|
|
|
|
CGM.getOpenMPRuntime().emitCriticalRegion(*this,
|
|
|
|
S.getDirectiveName().getAsString(),
|
|
|
|
CodeGen, S.getLocStart(), Hint);
|
2014-07-21 17:42:05 +08:00
|
|
|
}
|
|
|
|
|
2015-04-13 13:28:11 +08:00
|
|
|
void CodeGenFunction::EmitOMPParallelForDirective(
|
|
|
|
const OMPParallelForDirective &S) {
|
|
|
|
// Emit directive as a combined directive that consists of two implicit
|
|
|
|
// directives: 'parallel' with 'for' directive.
|
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
2015-05-12 16:35:28 +08:00
|
|
|
(void)emitScheduleClause(*this, S, /*OuterRegion=*/true);
|
2015-04-13 13:28:11 +08:00
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
|
|
|
CGF.EmitOMPWorksharingLoop(S);
|
|
|
|
};
|
2015-07-03 17:56:58 +08:00
|
|
|
emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen);
|
2014-07-07 21:01:15 +08:00
|
|
|
}
|
|
|
|
|
2014-09-23 17:33:00 +08:00
|
|
|
void CodeGenFunction::EmitOMPParallelForSimdDirective(
|
2015-06-18 18:10:12 +08:00
|
|
|
const OMPParallelForSimdDirective &S) {
|
|
|
|
// Emit directive as a combined directive that consists of two implicit
|
|
|
|
// directives: 'parallel' with 'for' directive.
|
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
|
|
|
(void)emitScheduleClause(*this, S, /*OuterRegion=*/true);
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
|
|
|
CGF.EmitOMPWorksharingLoop(S);
|
|
|
|
};
|
2015-07-03 17:56:58 +08:00
|
|
|
emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen);
|
2014-09-23 17:33:00 +08:00
|
|
|
}
|
|
|
|
|
2014-07-08 16:12:03 +08:00
|
|
|
void CodeGenFunction::EmitOMPParallelSectionsDirective(
|
2015-04-14 11:29:22 +08:00
|
|
|
const OMPParallelSectionsDirective &S) {
|
|
|
|
// Emit directive as a combined directive that consists of two implicit
|
|
|
|
// directives: 'parallel' with 'sections' directive.
|
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
2015-07-02 12:17:07 +08:00
|
|
|
(void)CGF.EmitSections(S);
|
2015-04-14 11:29:22 +08:00
|
|
|
};
|
2015-07-03 17:56:58 +08:00
|
|
|
emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen);
|
2014-07-08 16:12:03 +08:00
|
|
|
}
|
|
|
|
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
|
|
|
|
// Emit outlined function for task construct.
|
2015-04-10 12:50:10 +08:00
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
|
|
|
|
auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
|
|
|
|
auto *I = CS->getCapturedDecl()->param_begin();
|
2015-04-10 12:50:10 +08:00
|
|
|
auto *PartId = std::next(I);
|
[OPENMP] Initial codegen for 'omp task' directive.
The task region is emmitted in several steps:
Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry).
Here task_entry is a pointer to the function:
kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
TaskFunction(gtid, tt->part_id, tt->shareds);
return 0;
}
Copy a list of shared variables to field shareds of the resulting structure kmp_task_t returned by the previous call (if any).
Copy a pointer to destructions function to field destructions of the resulting structure kmp_task_t.
Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task), where new_task is a resulting structure from previous items.
Differential Revision: http://reviews.llvm.org/D7560
llvm-svn: 231762
2015-03-10 15:28:44 +08:00
|
|
|
// The first function argument for tasks is a thread id, the second one is a
|
|
|
|
// part id (0 for tied tasks, >=0 for untied task).
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
|
2015-04-30 14:51:57 +08:00
|
|
|
// Get list of private variables.
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
llvm::SmallVector<const Expr *, 8> PrivateVars;
|
2015-04-30 14:51:57 +08:00
|
|
|
llvm::SmallVector<const Expr *, 8> PrivateCopies;
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
|
2015-04-30 14:51:57 +08:00
|
|
|
auto IRef = C->varlist_begin();
|
|
|
|
for (auto *IInit : C->private_copies()) {
|
|
|
|
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
|
|
|
|
if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
PrivateVars.push_back(*IRef);
|
2015-04-30 14:51:57 +08:00
|
|
|
PrivateCopies.push_back(IInit);
|
|
|
|
}
|
|
|
|
++IRef;
|
|
|
|
}
|
|
|
|
}
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
EmittedAsPrivate.clear();
|
|
|
|
// Get list of firstprivate variables.
|
|
|
|
llvm::SmallVector<const Expr *, 8> FirstprivateVars;
|
|
|
|
llvm::SmallVector<const Expr *, 8> FirstprivateCopies;
|
|
|
|
llvm::SmallVector<const Expr *, 8> FirstprivateInits;
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
auto IRef = C->varlist_begin();
|
|
|
|
auto IElemInitRef = C->inits().begin();
|
|
|
|
for (auto *IInit : C->private_copies()) {
|
|
|
|
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
|
|
|
|
if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
|
|
|
|
FirstprivateVars.push_back(*IRef);
|
|
|
|
FirstprivateCopies.push_back(IInit);
|
|
|
|
FirstprivateInits.push_back(*IElemInitRef);
|
|
|
|
}
|
|
|
|
++IRef, ++IElemInitRef;
|
|
|
|
}
|
|
|
|
}
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
// Build list of dependences.
|
|
|
|
llvm::SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 8>
|
|
|
|
Dependences;
|
2015-08-30 23:12:28 +08:00
|
|
|
for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
for (auto *IRef : C->varlists()) {
|
|
|
|
Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
|
|
|
|
}
|
|
|
|
}
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars](
|
|
|
|
CodeGenFunction &CGF) {
|
|
|
|
// Set proper addresses for generated private copies.
|
|
|
|
auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
|
|
|
|
OMPPrivateScope Scope(CGF);
|
|
|
|
if (!PrivateVars.empty() || !FirstprivateVars.empty()) {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
auto *CopyFn = CGF.Builder.CreateLoad(
|
|
|
|
CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
|
|
|
|
auto *PrivatesPtr = CGF.Builder.CreateLoad(
|
|
|
|
CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
// Map privates.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
llvm::SmallVector<std::pair<const VarDecl *, Address>, 16>
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
PrivatePtrs;
|
|
|
|
llvm::SmallVector<llvm::Value *, 16> CallArgs;
|
|
|
|
CallArgs.push_back(PrivatesPtr);
|
|
|
|
for (auto *E : PrivateVars) {
|
|
|
|
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address PrivatePtr =
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
|
|
|
|
PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CallArgs.push_back(PrivatePtr.getPointer());
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
}
|
|
|
|
for (auto *E : FirstprivateVars) {
|
|
|
|
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address PrivatePtr =
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
|
|
|
|
PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
CallArgs.push_back(PrivatePtr.getPointer());
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
}
|
|
|
|
CGF.EmitRuntimeCall(CopyFn, CallArgs);
|
|
|
|
for (auto &&Pair : PrivatePtrs) {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address Replacement(CGF.Builder.CreateLoad(Pair.second),
|
|
|
|
CGF.getContext().getDeclAlign(Pair.first));
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
|
|
|
|
}
|
|
|
|
}
|
|
|
|
(void)Scope.Privatize();
|
|
|
|
if (*PartId) {
|
|
|
|
// TODO: emit code for untied tasks.
|
|
|
|
}
|
|
|
|
CGF.EmitStmt(CS->getCapturedStmt());
|
|
|
|
};
|
2015-07-03 17:56:58 +08:00
|
|
|
auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
|
|
|
|
S, *I, OMPD_task, CodeGen);
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
// Check if we should emit tied or untied task.
|
2015-08-30 23:12:28 +08:00
|
|
|
bool Tied = !S.getSingleClause<OMPUntiedClause>();
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
// Check if the task is final
|
|
|
|
llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
|
2015-08-30 23:12:28 +08:00
|
|
|
if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
// If the condition constant folds and can be elided, try to avoid emitting
|
|
|
|
// the condition and the dead arm of the if/else.
|
2015-08-30 23:12:28 +08:00
|
|
|
auto *Cond = Clause->getCondition();
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
bool CondConstant;
|
|
|
|
if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
|
|
|
|
Final.setInt(CondConstant);
|
|
|
|
else
|
|
|
|
Final.setPointer(EvaluateExprAsBool(Cond));
|
|
|
|
} else {
|
|
|
|
// By default the task is not final.
|
|
|
|
Final.setInt(/*IntVal=*/false);
|
|
|
|
}
|
|
|
|
auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
|
|
|
|
const Expr *IfCond = nullptr;
|
2015-09-03 16:45:56 +08:00
|
|
|
for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
|
|
|
|
if (C->getNameModifier() == OMPD_unknown ||
|
|
|
|
C->getNameModifier() == OMPD_task) {
|
|
|
|
IfCond = C->getCondition();
|
|
|
|
break;
|
|
|
|
}
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
}
|
[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
For tasks codegen for private/firstprivate variables are different rather than for other directives.
1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
Ty1 var1;
...
Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
void * shareds;
kmp_routine_entry_t routine;
kmp_int32 part_id;
kmp_routine_entry_t destructors;
.kmp_privates_t. privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
~Destructor(&tt->privates.var1);
...
~Destructor(&tt->privates.varn);
return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);
CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;
new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370
llvm-svn: 236479
2015-05-05 12:05:12 +08:00
|
|
|
CGM.getOpenMPRuntime().emitTaskCall(
|
|
|
|
*this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy,
|
[OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt.
Reworked codegen for privates in tasks:
call @kmpc_omp_task_alloc();
...
call @kmpc_omp_task(task_proxy);
void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) {
*priv1_ref = &privs->private1;
...
*privn_ref = &privs->privaten;
ret void
}
i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) {
type1 **priv1;
...
typen **privn;
call map_privates(privs, priv1, ..., privn);
<Task body with priv1, .., privn instead of the captured variables>.
ret i32
}
i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) {
call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds);
}
llvm-svn: 238010
2015-05-22 16:56:35 +08:00
|
|
|
CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars,
|
[OPENMP] Codegen for 'depend' clause (OpenMP 4.0).
If task directive has associated 'depend' clause then function kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called instead of __kmpc_omp_task().
If this directive has associated 'if' clause then also before a call of kmpc_omp_task_begin_if0() a function void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) must be called.
Array sections are not supported yet.
llvm-svn: 240532
2015-06-24 19:01:36 +08:00
|
|
|
FirstprivateCopies, FirstprivateInits, Dependences);
|
2014-07-11 19:25:16 +08:00
|
|
|
}
|
|
|
|
|
2015-02-05 13:57:51 +08:00
|
|
|
void CodeGenFunction::EmitOMPTaskyieldDirective(
|
|
|
|
const OMPTaskyieldDirective &S) {
|
2015-02-25 16:32:46 +08:00
|
|
|
CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
|
2014-07-18 15:47:19 +08:00
|
|
|
}
|
|
|
|
|
2014-12-05 12:09:23 +08:00
|
|
|
void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
|
2015-03-30 12:30:22 +08:00
|
|
|
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
|
2014-07-18 17:11:51 +08:00
|
|
|
}
|
|
|
|
|
2015-04-27 13:22:09 +08:00
|
|
|
void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
|
|
|
|
CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
|
2014-07-18 18:17:07 +08:00
|
|
|
}
|
|
|
|
|
2015-06-18 20:14:09 +08:00
|
|
|
void CodeGenFunction::EmitOMPTaskgroupDirective(
|
|
|
|
const OMPTaskgroupDirective &S) {
|
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
|
|
|
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
|
|
|
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
|
|
|
};
|
|
|
|
CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
|
|
|
|
}
|
|
|
|
|
2014-11-20 12:34:54 +08:00
|
|
|
void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
|
2015-02-25 16:32:46 +08:00
|
|
|
CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
|
2015-08-30 23:12:28 +08:00
|
|
|
if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
|
2015-02-25 16:32:46 +08:00
|
|
|
return llvm::makeArrayRef(FlushClause->varlist_begin(),
|
|
|
|
FlushClause->varlist_end());
|
|
|
|
}
|
|
|
|
return llvm::None;
|
|
|
|
}(), S.getLocStart());
|
2014-07-21 19:26:11 +08:00
|
|
|
}
|
|
|
|
|
2015-12-14 22:51:25 +08:00
|
|
|
void CodeGenFunction::EmitOMPDistributeDirective(
|
|
|
|
const OMPDistributeDirective &S) {
|
|
|
|
llvm_unreachable("CodeGen for 'omp distribute' is not supported yet.");
|
|
|
|
}
|
|
|
|
|
2015-09-29 11:48:57 +08:00
|
|
|
static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
|
|
|
|
const CapturedStmt *S) {
|
|
|
|
CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
|
|
|
|
CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
|
|
|
|
CGF.CapturedStmtInfo = &CapStmtInfo;
|
|
|
|
auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
|
|
|
|
Fn->addFnAttr(llvm::Attribute::NoInline);
|
|
|
|
return Fn;
|
|
|
|
}
|
|
|
|
|
2015-04-22 19:15:40 +08:00
|
|
|
void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
|
2015-12-18 15:58:25 +08:00
|
|
|
if (!S.getAssociatedStmt())
|
|
|
|
return;
|
2015-04-22 19:15:40 +08:00
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
2015-09-29 11:48:57 +08:00
|
|
|
auto *C = S.getSingleClause<OMPSIMDClause>();
|
|
|
|
auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) {
|
|
|
|
if (C) {
|
|
|
|
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
|
|
|
|
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
|
|
|
|
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
|
|
|
|
auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
|
|
|
|
CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars);
|
|
|
|
} else {
|
|
|
|
CGF.EmitStmt(
|
|
|
|
cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
|
|
|
}
|
2015-04-22 19:15:40 +08:00
|
|
|
};
|
2015-09-29 11:48:57 +08:00
|
|
|
CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
|
2014-07-22 14:45:04 +08:00
|
|
|
}
|
|
|
|
|
2015-01-22 14:17:56 +08:00
|
|
|
static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
|
2015-08-11 12:19:28 +08:00
|
|
|
QualType SrcType, QualType DestType,
|
|
|
|
SourceLocation Loc) {
|
2015-01-22 14:17:56 +08:00
|
|
|
assert(CGF.hasScalarEvaluationKind(DestType) &&
|
|
|
|
"DestType must have scalar evaluation kind.");
|
|
|
|
assert(!Val.isAggregate() && "Must be a scalar or complex.");
|
|
|
|
return Val.isScalar()
|
2015-08-11 12:19:28 +08:00
|
|
|
? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
|
|
|
|
Loc)
|
2015-01-22 14:17:56 +08:00
|
|
|
: CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
|
2015-08-11 12:19:28 +08:00
|
|
|
DestType, Loc);
|
2015-01-22 14:17:56 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static CodeGenFunction::ComplexPairTy
|
|
|
|
convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
|
2015-08-11 12:19:28 +08:00
|
|
|
QualType DestType, SourceLocation Loc) {
|
2015-01-22 14:17:56 +08:00
|
|
|
assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
|
|
|
|
"DestType must have complex evaluation kind.");
|
|
|
|
CodeGenFunction::ComplexPairTy ComplexVal;
|
|
|
|
if (Val.isScalar()) {
|
|
|
|
// Convert the input element to the element type of the complex.
|
|
|
|
auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
|
2015-08-11 12:19:28 +08:00
|
|
|
auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
|
|
|
|
DestElementType, Loc);
|
2015-01-22 14:17:56 +08:00
|
|
|
ComplexVal = CodeGenFunction::ComplexPairTy(
|
|
|
|
ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
|
|
|
|
} else {
|
|
|
|
assert(Val.isComplex() && "Must be a scalar or complex.");
|
|
|
|
auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
|
|
|
|
auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
|
|
|
|
ComplexVal.first = CGF.EmitScalarConversion(
|
2015-08-11 12:19:28 +08:00
|
|
|
Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
|
2015-01-22 14:17:56 +08:00
|
|
|
ComplexVal.second = CGF.EmitScalarConversion(
|
2015-08-11 12:19:28 +08:00
|
|
|
Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
|
2015-01-22 14:17:56 +08:00
|
|
|
}
|
|
|
|
return ComplexVal;
|
|
|
|
}
|
|
|
|
|
2015-04-23 14:35:10 +08:00
|
|
|
static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
|
|
|
|
LValue LVal, RValue RVal) {
|
|
|
|
if (LVal.isGlobalReg()) {
|
|
|
|
CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
|
|
|
|
} else {
|
|
|
|
CGF.EmitAtomicStore(RVal, LVal, IsSeqCst ? llvm::SequentiallyConsistent
|
|
|
|
: llvm::Monotonic,
|
|
|
|
LVal.isVolatile(), /*IsInit=*/false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-01-21 20:35:58 +08:00
|
|
|
void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
|
|
|
|
QualType RValTy, SourceLocation Loc) {
|
|
|
|
switch (getEvaluationKind(LVal.getType())) {
|
2015-04-23 14:35:10 +08:00
|
|
|
case TEK_Scalar:
|
2016-01-21 20:35:58 +08:00
|
|
|
EmitStoreThroughLValue(RValue::get(convertToScalarValue(
|
|
|
|
*this, RVal, RValTy, LVal.getType(), Loc)),
|
|
|
|
LVal);
|
2015-04-23 14:35:10 +08:00
|
|
|
break;
|
|
|
|
case TEK_Complex:
|
2016-01-21 20:35:58 +08:00
|
|
|
EmitStoreOfComplex(
|
|
|
|
convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
|
2015-04-23 14:35:10 +08:00
|
|
|
/*isInit=*/false);
|
|
|
|
break;
|
|
|
|
case TEK_Aggregate:
|
|
|
|
llvm_unreachable("Must be a scalar or complex.");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-22 14:17:56 +08:00
|
|
|
static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
|
|
|
|
const Expr *X, const Expr *V,
|
|
|
|
SourceLocation Loc) {
|
|
|
|
// v = x;
|
|
|
|
assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
|
|
|
|
assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
|
|
|
|
LValue XLValue = CGF.EmitLValue(X);
|
|
|
|
LValue VLValue = CGF.EmitLValue(V);
|
2015-02-14 09:35:12 +08:00
|
|
|
RValue Res = XLValue.isGlobalReg()
|
|
|
|
? CGF.EmitLoadOfLValue(XLValue, Loc)
|
|
|
|
: CGF.EmitAtomicLoad(XLValue, Loc,
|
|
|
|
IsSeqCst ? llvm::SequentiallyConsistent
|
2015-02-27 14:33:30 +08:00
|
|
|
: llvm::Monotonic,
|
|
|
|
XLValue.isVolatile());
|
2015-01-22 14:17:56 +08:00
|
|
|
// OpenMP, 2.12.6, atomic Construct
|
|
|
|
// Any atomic construct with a seq_cst clause forces the atomically
|
|
|
|
// performed operation to include an implicit flush operation without a
|
|
|
|
// list.
|
|
|
|
if (IsSeqCst)
|
2015-02-25 16:32:46 +08:00
|
|
|
CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
|
2016-01-21 20:35:58 +08:00
|
|
|
CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
|
2015-01-22 14:17:56 +08:00
|
|
|
}
|
|
|
|
|
2015-02-27 14:33:30 +08:00
|
|
|
static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
|
|
|
|
const Expr *X, const Expr *E,
|
|
|
|
SourceLocation Loc) {
|
|
|
|
// x = expr;
|
|
|
|
assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
|
2015-04-23 14:35:10 +08:00
|
|
|
emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
|
2015-02-27 14:33:30 +08:00
|
|
|
// OpenMP, 2.12.6, atomic Construct
|
|
|
|
// Any atomic construct with a seq_cst clause forces the atomically
|
|
|
|
// performed operation to include an implicit flush operation without a
|
|
|
|
// list.
|
|
|
|
if (IsSeqCst)
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
|
|
|
|
}
|
|
|
|
|
2015-05-01 21:59:53 +08:00
|
|
|
static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
|
|
|
|
RValue Update,
|
|
|
|
BinaryOperatorKind BO,
|
|
|
|
llvm::AtomicOrdering AO,
|
|
|
|
bool IsXLHSInRHSPart) {
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
auto &Context = CGF.CGM.getContext();
|
|
|
|
// Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
|
2015-03-30 13:20:59 +08:00
|
|
|
// expression is simple and atomic is allowed for the given type for the
|
|
|
|
// target platform.
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
if (BO == BO_Comma || !Update.isScalar() ||
|
2015-05-08 19:47:16 +08:00
|
|
|
!Update.getScalarVal()->getType()->isIntegerTy() ||
|
|
|
|
!X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
|
|
|
|
(Update.getScalarVal()->getType() !=
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
X.getAddress().getElementType())) ||
|
|
|
|
!X.getAddress().getElementType()->isIntegerTy() ||
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
!Context.getTargetInfo().hasBuiltinAtomic(
|
|
|
|
Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
|
2015-04-23 14:35:10 +08:00
|
|
|
return std::make_pair(false, RValue::get(nullptr));
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
|
|
|
|
llvm::AtomicRMWInst::BinOp RMWOp;
|
|
|
|
switch (BO) {
|
|
|
|
case BO_Add:
|
|
|
|
RMWOp = llvm::AtomicRMWInst::Add;
|
|
|
|
break;
|
|
|
|
case BO_Sub:
|
|
|
|
if (!IsXLHSInRHSPart)
|
2015-04-23 14:35:10 +08:00
|
|
|
return std::make_pair(false, RValue::get(nullptr));
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
RMWOp = llvm::AtomicRMWInst::Sub;
|
|
|
|
break;
|
|
|
|
case BO_And:
|
|
|
|
RMWOp = llvm::AtomicRMWInst::And;
|
|
|
|
break;
|
|
|
|
case BO_Or:
|
|
|
|
RMWOp = llvm::AtomicRMWInst::Or;
|
|
|
|
break;
|
|
|
|
case BO_Xor:
|
|
|
|
RMWOp = llvm::AtomicRMWInst::Xor;
|
|
|
|
break;
|
|
|
|
case BO_LT:
|
|
|
|
RMWOp = X.getType()->hasSignedIntegerRepresentation()
|
|
|
|
? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
|
|
|
|
: llvm::AtomicRMWInst::Max)
|
|
|
|
: (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
|
|
|
|
: llvm::AtomicRMWInst::UMax);
|
|
|
|
break;
|
|
|
|
case BO_GT:
|
|
|
|
RMWOp = X.getType()->hasSignedIntegerRepresentation()
|
|
|
|
? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
|
|
|
|
: llvm::AtomicRMWInst::Min)
|
|
|
|
: (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
|
|
|
|
: llvm::AtomicRMWInst::UMin);
|
|
|
|
break;
|
2015-04-23 14:35:10 +08:00
|
|
|
case BO_Assign:
|
|
|
|
RMWOp = llvm::AtomicRMWInst::Xchg;
|
|
|
|
break;
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
case BO_Mul:
|
|
|
|
case BO_Div:
|
|
|
|
case BO_Rem:
|
|
|
|
case BO_Shl:
|
|
|
|
case BO_Shr:
|
|
|
|
case BO_LAnd:
|
|
|
|
case BO_LOr:
|
2015-04-23 14:35:10 +08:00
|
|
|
return std::make_pair(false, RValue::get(nullptr));
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
case BO_PtrMemD:
|
|
|
|
case BO_PtrMemI:
|
|
|
|
case BO_LE:
|
|
|
|
case BO_GE:
|
|
|
|
case BO_EQ:
|
|
|
|
case BO_NE:
|
|
|
|
case BO_AddAssign:
|
|
|
|
case BO_SubAssign:
|
|
|
|
case BO_AndAssign:
|
|
|
|
case BO_OrAssign:
|
|
|
|
case BO_XorAssign:
|
|
|
|
case BO_MulAssign:
|
|
|
|
case BO_DivAssign:
|
|
|
|
case BO_RemAssign:
|
|
|
|
case BO_ShlAssign:
|
|
|
|
case BO_ShrAssign:
|
|
|
|
case BO_Comma:
|
|
|
|
llvm_unreachable("Unsupported atomic update operation");
|
|
|
|
}
|
|
|
|
auto *UpdateVal = Update.getScalarVal();
|
|
|
|
if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
|
|
|
|
UpdateVal = CGF.Builder.CreateIntCast(
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
IC, X.getAddress().getElementType(),
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
X.getType()->hasSignedIntegerRepresentation());
|
|
|
|
}
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
|
2015-04-23 14:35:10 +08:00
|
|
|
return std::make_pair(true, RValue::get(Res));
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
}
|
|
|
|
|
2015-04-23 14:35:10 +08:00
|
|
|
std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
|
|
|
|
llvm::AtomicOrdering AO, SourceLocation Loc,
|
|
|
|
const llvm::function_ref<RValue(RValue)> &CommonGen) {
|
|
|
|
// Update expressions are allowed to have the following forms:
|
|
|
|
// x binop= expr; -> xrval + expr;
|
|
|
|
// x++, ++x -> xrval + 1;
|
|
|
|
// x--, --x -> xrval - 1;
|
|
|
|
// x = x binop expr; -> xrval binop expr
|
|
|
|
// x = expr Op x; - > expr binop xrval;
|
2015-04-23 14:35:10 +08:00
|
|
|
auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
|
|
|
|
if (!Res.first) {
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
if (X.isGlobalReg()) {
|
|
|
|
// Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
|
|
|
|
// 'xrval'.
|
|
|
|
EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
|
|
|
|
} else {
|
|
|
|
// Perform compare-and-swap procedure.
|
|
|
|
EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
|
2015-03-30 13:20:59 +08:00
|
|
|
}
|
|
|
|
}
|
2015-04-23 14:35:10 +08:00
|
|
|
return Res;
|
2015-03-30 13:20:59 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
|
|
|
|
const Expr *X, const Expr *E,
|
|
|
|
const Expr *UE, bool IsXLHSInRHSPart,
|
|
|
|
SourceLocation Loc) {
|
|
|
|
assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
|
|
|
|
"Update expr in 'atomic update' must be a binary operator.");
|
|
|
|
auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
|
|
|
|
// Update expressions are allowed to have the following forms:
|
|
|
|
// x binop= expr; -> xrval + expr;
|
|
|
|
// x++, ++x -> xrval + 1;
|
|
|
|
// x--, --x -> xrval - 1;
|
|
|
|
// x = x binop expr; -> xrval binop expr
|
|
|
|
// x = expr Op x; - > expr binop xrval;
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
|
2015-03-30 13:20:59 +08:00
|
|
|
LValue XLValue = CGF.EmitLValue(X);
|
|
|
|
RValue ExprRValue = CGF.EmitAnyExpr(E);
|
|
|
|
auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic;
|
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
2015-04-10 18:43:45 +08:00
|
|
|
auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
|
|
|
|
auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
|
|
|
|
auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
|
|
|
|
auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
|
|
|
|
auto Gen =
|
|
|
|
[&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
|
|
|
|
CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
|
|
|
|
CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
|
|
|
|
return CGF.EmitAnyExpr(UE);
|
|
|
|
};
|
2015-04-23 14:35:10 +08:00
|
|
|
(void)CGF.EmitOMPAtomicSimpleUpdateExpr(
|
|
|
|
XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
|
|
|
|
// OpenMP, 2.12.6, atomic Construct
|
|
|
|
// Any atomic construct with a seq_cst clause forces the atomically
|
|
|
|
// performed operation to include an implicit flush operation without a
|
|
|
|
// list.
|
|
|
|
if (IsSeqCst)
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
|
|
|
|
}
|
|
|
|
|
|
|
|
static RValue convertToType(CodeGenFunction &CGF, RValue Value,
|
2015-08-11 12:19:28 +08:00
|
|
|
QualType SourceType, QualType ResType,
|
|
|
|
SourceLocation Loc) {
|
2015-04-23 14:35:10 +08:00
|
|
|
switch (CGF.getEvaluationKind(ResType)) {
|
|
|
|
case TEK_Scalar:
|
2015-08-11 12:19:28 +08:00
|
|
|
return RValue::get(
|
|
|
|
convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
|
2015-04-23 14:35:10 +08:00
|
|
|
case TEK_Complex: {
|
2015-08-11 12:19:28 +08:00
|
|
|
auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
|
2015-04-23 14:35:10 +08:00
|
|
|
return RValue::getComplex(Res.first, Res.second);
|
|
|
|
}
|
|
|
|
case TEK_Aggregate:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
llvm_unreachable("Must be a scalar or complex.");
|
|
|
|
}
|
|
|
|
|
|
|
|
static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
|
|
|
|
bool IsPostfixUpdate, const Expr *V,
|
|
|
|
const Expr *X, const Expr *E,
|
|
|
|
const Expr *UE, bool IsXLHSInRHSPart,
|
|
|
|
SourceLocation Loc) {
|
|
|
|
assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
|
|
|
|
assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
|
|
|
|
RValue NewVVal;
|
|
|
|
LValue VLValue = CGF.EmitLValue(V);
|
|
|
|
LValue XLValue = CGF.EmitLValue(X);
|
|
|
|
RValue ExprRValue = CGF.EmitAnyExpr(E);
|
|
|
|
auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic;
|
|
|
|
QualType NewVValType;
|
|
|
|
if (UE) {
|
|
|
|
// 'x' is updated with some additional value.
|
|
|
|
assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
|
|
|
|
"Update expr in 'atomic capture' must be a binary operator.");
|
|
|
|
auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
|
|
|
|
// Update expressions are allowed to have the following forms:
|
|
|
|
// x binop= expr; -> xrval + expr;
|
|
|
|
// x++, ++x -> xrval + 1;
|
|
|
|
// x--, --x -> xrval - 1;
|
|
|
|
// x = x binop expr; -> xrval binop expr
|
|
|
|
// x = expr Op x; - > expr binop xrval;
|
|
|
|
auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
|
|
|
|
auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
|
|
|
|
auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
|
|
|
|
NewVValType = XRValExpr->getType();
|
|
|
|
auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
|
|
|
|
auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
|
|
|
|
IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue {
|
|
|
|
CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
|
|
|
|
CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
|
|
|
|
RValue Res = CGF.EmitAnyExpr(UE);
|
|
|
|
NewVVal = IsPostfixUpdate ? XRValue : Res;
|
|
|
|
return Res;
|
|
|
|
};
|
|
|
|
auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
|
|
|
|
XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
|
|
|
|
if (Res.first) {
|
|
|
|
// 'atomicrmw' instruction was generated.
|
|
|
|
if (IsPostfixUpdate) {
|
|
|
|
// Use old value from 'atomicrmw'.
|
|
|
|
NewVVal = Res.second;
|
|
|
|
} else {
|
|
|
|
// 'atomicrmw' does not provide new value, so evaluate it using old
|
|
|
|
// value of 'x'.
|
|
|
|
CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
|
|
|
|
CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
|
|
|
|
NewVVal = CGF.EmitAnyExpr(UE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// 'x' is simply rewritten with some 'expr'.
|
|
|
|
NewVValType = X->getType().getNonReferenceType();
|
|
|
|
ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
|
2015-08-11 12:19:28 +08:00
|
|
|
X->getType().getNonReferenceType(), Loc);
|
2015-04-23 14:35:10 +08:00
|
|
|
auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue {
|
|
|
|
NewVVal = XRValue;
|
|
|
|
return ExprRValue;
|
|
|
|
};
|
|
|
|
// Try to perform atomicrmw xchg, otherwise simple exchange.
|
|
|
|
auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
|
|
|
|
XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
|
|
|
|
Loc, Gen);
|
|
|
|
if (Res.first) {
|
|
|
|
// 'atomicrmw' instruction was generated.
|
|
|
|
NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Emit post-update store to 'v' of old/new 'x' value.
|
2016-01-21 20:35:58 +08:00
|
|
|
CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
|
2015-03-30 13:20:59 +08:00
|
|
|
// OpenMP, 2.12.6, atomic Construct
|
|
|
|
// Any atomic construct with a seq_cst clause forces the atomically
|
|
|
|
// performed operation to include an implicit flush operation without a
|
|
|
|
// list.
|
|
|
|
if (IsSeqCst)
|
|
|
|
CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
|
|
|
|
}
|
|
|
|
|
2015-01-22 14:17:56 +08:00
|
|
|
static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
|
2015-04-23 14:35:10 +08:00
|
|
|
bool IsSeqCst, bool IsPostfixUpdate,
|
|
|
|
const Expr *X, const Expr *V, const Expr *E,
|
|
|
|
const Expr *UE, bool IsXLHSInRHSPart,
|
|
|
|
SourceLocation Loc) {
|
2015-01-22 14:17:56 +08:00
|
|
|
switch (Kind) {
|
|
|
|
case OMPC_read:
|
|
|
|
EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
|
|
|
|
break;
|
|
|
|
case OMPC_write:
|
2015-02-27 14:33:30 +08:00
|
|
|
EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
|
|
|
|
break;
|
2015-03-30 13:20:59 +08:00
|
|
|
case OMPC_unknown:
|
2015-01-22 14:17:56 +08:00
|
|
|
case OMPC_update:
|
2015-03-30 13:20:59 +08:00
|
|
|
EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
|
|
|
|
break;
|
2015-01-22 14:17:56 +08:00
|
|
|
case OMPC_capture:
|
2015-04-23 14:35:10 +08:00
|
|
|
EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
|
|
|
|
IsXLHSInRHSPart, Loc);
|
|
|
|
break;
|
2015-01-22 14:17:56 +08:00
|
|
|
case OMPC_if:
|
|
|
|
case OMPC_final:
|
|
|
|
case OMPC_num_threads:
|
|
|
|
case OMPC_private:
|
|
|
|
case OMPC_firstprivate:
|
|
|
|
case OMPC_lastprivate:
|
|
|
|
case OMPC_reduction:
|
|
|
|
case OMPC_safelen:
|
2015-08-21 19:14:16 +08:00
|
|
|
case OMPC_simdlen:
|
2015-01-22 14:17:56 +08:00
|
|
|
case OMPC_collapse:
|
|
|
|
case OMPC_default:
|
|
|
|
case OMPC_seq_cst:
|
|
|
|
case OMPC_shared:
|
|
|
|
case OMPC_linear:
|
|
|
|
case OMPC_aligned:
|
|
|
|
case OMPC_copyin:
|
|
|
|
case OMPC_copyprivate:
|
|
|
|
case OMPC_flush:
|
|
|
|
case OMPC_proc_bind:
|
|
|
|
case OMPC_schedule:
|
|
|
|
case OMPC_ordered:
|
|
|
|
case OMPC_nowait:
|
|
|
|
case OMPC_untied:
|
|
|
|
case OMPC_threadprivate:
|
2015-06-23 22:25:19 +08:00
|
|
|
case OMPC_depend:
|
2015-01-22 14:17:56 +08:00
|
|
|
case OMPC_mergeable:
|
2015-08-08 00:16:36 +08:00
|
|
|
case OMPC_device:
|
2015-09-25 18:37:12 +08:00
|
|
|
case OMPC_threads:
|
2015-09-28 14:39:35 +08:00
|
|
|
case OMPC_simd:
|
2015-11-23 13:32:03 +08:00
|
|
|
case OMPC_map:
|
2015-11-25 04:50:12 +08:00
|
|
|
case OMPC_num_teams:
|
2015-11-28 02:47:36 +08:00
|
|
|
case OMPC_thread_limit:
|
2015-12-01 18:17:31 +08:00
|
|
|
case OMPC_priority:
|
2015-12-07 20:52:51 +08:00
|
|
|
case OMPC_grainsize:
|
2015-12-07 18:51:44 +08:00
|
|
|
case OMPC_nogroup:
|
2015-12-08 20:06:20 +08:00
|
|
|
case OMPC_num_tasks:
|
2015-12-15 16:19:24 +08:00
|
|
|
case OMPC_hint:
|
2016-01-16 02:50:31 +08:00
|
|
|
case OMPC_dist_schedule:
|
2016-01-27 00:37:23 +08:00
|
|
|
case OMPC_defaultmap:
|
2015-01-22 14:17:56 +08:00
|
|
|
llvm_unreachable("Clause is not allowed in 'omp atomic'.");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
|
2015-08-30 23:12:28 +08:00
|
|
|
bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
|
2015-01-22 14:17:56 +08:00
|
|
|
OpenMPClauseKind Kind = OMPC_unknown;
|
|
|
|
for (auto *C : S.clauses()) {
|
|
|
|
// Find first clause (skip seq_cst clause, if it is first).
|
|
|
|
if (C->getClauseKind() != OMPC_seq_cst) {
|
|
|
|
Kind = C->getClauseKind();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2015-03-11 12:48:56 +08:00
|
|
|
|
|
|
|
const auto *CS =
|
|
|
|
S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
|
2015-04-23 14:35:10 +08:00
|
|
|
if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
|
2015-03-11 12:48:56 +08:00
|
|
|
enterFullExpression(EWC);
|
2015-04-23 14:35:10 +08:00
|
|
|
}
|
|
|
|
// Processing for statements under 'atomic capture'.
|
|
|
|
if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
|
|
|
|
for (const auto *C : Compound->body()) {
|
|
|
|
if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
|
|
|
|
enterFullExpression(EWC);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-03-11 12:48:56 +08:00
|
|
|
|
2015-04-10 12:50:10 +08:00
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
2015-12-14 17:26:19 +08:00
|
|
|
auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF) {
|
|
|
|
CGF.EmitStopPoint(CS);
|
2015-04-23 14:35:10 +08:00
|
|
|
EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
|
|
|
|
S.getV(), S.getExpr(), S.getUpdateExpr(),
|
|
|
|
S.isXLHSInRHSPart(), S.getLocStart());
|
2015-04-10 12:50:10 +08:00
|
|
|
};
|
2015-07-03 17:56:58 +08:00
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
|
2014-07-22 18:10:35 +08:00
|
|
|
}
|
|
|
|
|
2015-10-03 00:14:20 +08:00
|
|
|
void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
|
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
|
|
|
const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
|
|
|
|
|
|
|
|
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
|
2015-12-03 01:44:43 +08:00
|
|
|
GenerateOpenMPCapturedVars(CS, CapturedVars);
|
2015-10-03 00:14:20 +08:00
|
|
|
|
2016-01-06 21:42:12 +08:00
|
|
|
llvm::Function *Fn = nullptr;
|
|
|
|
llvm::Constant *FnID = nullptr;
|
2015-10-03 00:14:20 +08:00
|
|
|
|
|
|
|
// Check if we have any if clause associated with the directive.
|
|
|
|
const Expr *IfCond = nullptr;
|
|
|
|
|
|
|
|
if (auto *C = S.getSingleClause<OMPIfClause>()) {
|
|
|
|
IfCond = C->getCondition();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check if we have any device clause associated with the directive.
|
|
|
|
const Expr *Device = nullptr;
|
|
|
|
if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
|
|
|
|
Device = C->getDevice();
|
|
|
|
}
|
|
|
|
|
2016-01-06 21:42:12 +08:00
|
|
|
// Check if we have an if clause whose conditional always evaluates to false
|
|
|
|
// or if we do not have any targets specified. If so the target region is not
|
|
|
|
// an offload entry point.
|
|
|
|
bool IsOffloadEntry = true;
|
|
|
|
if (IfCond) {
|
|
|
|
bool Val;
|
|
|
|
if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
|
|
|
|
IsOffloadEntry = false;
|
|
|
|
}
|
|
|
|
if (CGM.getLangOpts().OMPTargetTriples.empty())
|
|
|
|
IsOffloadEntry = false;
|
|
|
|
|
|
|
|
assert(CurFuncDecl && "No parent declaration for target region!");
|
|
|
|
StringRef ParentName;
|
|
|
|
// In case we have Ctors/Dtors we use the complete type variant to produce
|
|
|
|
// the mangling of the device outlined kernel.
|
|
|
|
if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl))
|
|
|
|
ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
|
|
|
|
else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl))
|
|
|
|
ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
|
|
|
|
else
|
|
|
|
ParentName =
|
|
|
|
CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl)));
|
|
|
|
|
|
|
|
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
|
|
|
|
IsOffloadEntry);
|
|
|
|
|
|
|
|
CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device,
|
2015-10-03 00:14:20 +08:00
|
|
|
CapturedVars);
|
2014-09-19 16:19:49 +08:00
|
|
|
}
|
|
|
|
|
2014-10-09 12:18:56 +08:00
|
|
|
void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
|
|
|
|
llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
|
|
|
|
}
|
2015-07-01 14:57:41 +08:00
|
|
|
|
|
|
|
void CodeGenFunction::EmitOMPCancellationPointDirective(
|
|
|
|
const OMPCancellationPointDirective &S) {
|
2015-07-02 12:17:07 +08:00
|
|
|
CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
|
|
|
|
S.getCancelRegion());
|
2015-07-01 14:57:41 +08:00
|
|
|
}
|
|
|
|
|
2015-07-02 19:25:17 +08:00
|
|
|
void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
|
2015-09-18 16:07:34 +08:00
|
|
|
const Expr *IfCond = nullptr;
|
|
|
|
for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
|
|
|
|
if (C->getNameModifier() == OMPD_unknown ||
|
|
|
|
C->getNameModifier() == OMPD_cancel) {
|
|
|
|
IfCond = C->getCondition();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
|
2015-07-06 13:50:32 +08:00
|
|
|
S.getCancelRegion());
|
2015-07-02 19:25:17 +08:00
|
|
|
}
|
|
|
|
|
2015-07-03 17:56:58 +08:00
|
|
|
CodeGenFunction::JumpDest
|
|
|
|
CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
|
|
|
|
if (Kind == OMPD_parallel || Kind == OMPD_task)
|
|
|
|
return ReturnBlock;
|
2015-09-15 20:52:43 +08:00
|
|
|
assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
|
2016-01-22 16:56:50 +08:00
|
|
|
Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for);
|
2015-09-15 20:52:43 +08:00
|
|
|
return BreakContinueStack.back().BreakBlock;
|
2015-07-03 17:56:58 +08:00
|
|
|
}
|
2015-07-21 21:44:28 +08:00
|
|
|
|
|
|
|
// Generate the instructions for '#pragma omp target data' directive.
|
|
|
|
void CodeGenFunction::EmitOMPTargetDataDirective(
|
|
|
|
const OMPTargetDataDirective &S) {
|
|
|
|
// emit the code inside the construct for now
|
|
|
|
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
|
2015-08-11 12:52:01 +08:00
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(
|
|
|
|
*this, OMPD_target_data,
|
|
|
|
[&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
|
2015-07-21 21:44:28 +08:00
|
|
|
}
|
2015-12-01 12:18:41 +08:00
|
|
|
|
2016-01-20 03:15:56 +08:00
|
|
|
void CodeGenFunction::EmitOMPTargetEnterDataDirective(
|
|
|
|
const OMPTargetEnterDataDirective &S) {
|
|
|
|
// TODO: codegen for target enter data.
|
|
|
|
}
|
|
|
|
|
2016-01-20 04:04:50 +08:00
|
|
|
void CodeGenFunction::EmitOMPTargetExitDataDirective(
|
|
|
|
const OMPTargetExitDataDirective &S) {
|
|
|
|
// TODO: codegen for target exit data.
|
|
|
|
}
|
|
|
|
|
2016-01-27 02:48:41 +08:00
|
|
|
void CodeGenFunction::EmitOMPTargetParallelDirective(
|
|
|
|
const OMPTargetParallelDirective &S) {
|
|
|
|
// TODO: codegen for target parallel.
|
|
|
|
}
|
|
|
|
|
2015-12-01 12:18:41 +08:00
|
|
|
void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
|
|
|
|
// emit the code inside the construct for now
|
|
|
|
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
|
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(
|
|
|
|
*this, OMPD_taskloop,
|
|
|
|
[&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
|
|
|
|
}
|
|
|
|
|
2015-12-03 17:40:15 +08:00
|
|
|
void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
|
|
|
|
const OMPTaskLoopSimdDirective &S) {
|
|
|
|
// emit the code inside the construct for now
|
|
|
|
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
|
|
|
|
CGM.getOpenMPRuntime().emitInlinedDirective(
|
|
|
|
*this, OMPD_taskloop_simd,
|
|
|
|
[&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
|
|
|
|
}
|
|
|
|
|