Revert "[OPENMP]Fix PR37671: Privatize local(private) variables in untied tasks."

This reverts commit ec9563c54e to
investigate compiler crash revelaed by the buildbots.
This commit is contained in:
Alexey Bataev 2020-08-12 09:49:59 -04:00
parent e891b6a75d
commit 3651658bdd
4 changed files with 77 additions and 225 deletions

View File

@ -180,7 +180,7 @@ public:
UntiedCodeGen(CGF); UntiedCodeGen(CGF);
CodeGenFunction::JumpDest CurPoint = CodeGenFunction::JumpDest CurPoint =
CGF.getJumpDestInCurrentScope(".untied.next."); CGF.getJumpDestInCurrentScope(".untied.next.");
CGF.EmitBranch(CGF.ReturnBlock.getBlock()); CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
CGF.Builder.GetInsertBlock()); CGF.Builder.GetInsertBlock());
@ -3370,7 +3370,6 @@ struct PrivateHelpersTy {
const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
: OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
PrivateElemInit(PrivateElemInit) {} PrivateElemInit(PrivateElemInit) {}
PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
const Expr *OriginalRef = nullptr; const Expr *OriginalRef = nullptr;
const VarDecl *Original = nullptr; const VarDecl *Original = nullptr;
const VarDecl *PrivateCopy = nullptr; const VarDecl *PrivateCopy = nullptr;
@ -3391,10 +3390,6 @@ createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
for (const auto &Pair : Privates) { for (const auto &Pair : Privates) {
const VarDecl *VD = Pair.second.Original; const VarDecl *VD = Pair.second.Original;
QualType Type = VD->getType().getNonReferenceType(); QualType Type = VD->getType().getNonReferenceType();
// If the private variable is a local variable with lvalue ref type,
// allocate the pointer instead of the pointee type.
if (!Pair.second.OriginalRef && VD->getType()->isLValueReferenceType())
Type = C.getPointerType(Type);
FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
if (VD->hasAttrs()) { if (VD->hasAttrs()) {
for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
@ -3648,7 +3643,10 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
/// \endcode /// \endcode
static llvm::Value * static llvm::Value *
emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef<const Expr *> PrivateVars,
ArrayRef<const Expr *> FirstprivateVars,
ArrayRef<const Expr *> LastprivateVars,
QualType PrivatesQTy,
ArrayRef<PrivateDataTy> Privates) { ArrayRef<PrivateDataTy> Privates) {
ASTContext &C = CGM.getContext(); ASTContext &C = CGM.getContext();
FunctionArgList Args; FunctionArgList Args;
@ -3657,9 +3655,9 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
C.getPointerType(PrivatesQTy).withConst().withRestrict(), C.getPointerType(PrivatesQTy).withConst().withRestrict(),
ImplicitParamDecl::Other); ImplicitParamDecl::Other);
Args.push_back(&TaskPrivatesArg); Args.push_back(&TaskPrivatesArg);
llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
unsigned Counter = 1; unsigned Counter = 1;
for (const Expr *E : Data.PrivateVars) { for (const Expr *E : PrivateVars) {
Args.push_back(ImplicitParamDecl::Create( Args.push_back(ImplicitParamDecl::Create(
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
C.getPointerType(C.getPointerType(E->getType())) C.getPointerType(C.getPointerType(E->getType()))
@ -3670,7 +3668,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
PrivateVarsPos[VD] = Counter; PrivateVarsPos[VD] = Counter;
++Counter; ++Counter;
} }
for (const Expr *E : Data.FirstprivateVars) { for (const Expr *E : FirstprivateVars) {
Args.push_back(ImplicitParamDecl::Create( Args.push_back(ImplicitParamDecl::Create(
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
C.getPointerType(C.getPointerType(E->getType())) C.getPointerType(C.getPointerType(E->getType()))
@ -3681,7 +3679,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
PrivateVarsPos[VD] = Counter; PrivateVarsPos[VD] = Counter;
++Counter; ++Counter;
} }
for (const Expr *E : Data.LastprivateVars) { for (const Expr *E : LastprivateVars) {
Args.push_back(ImplicitParamDecl::Create( Args.push_back(ImplicitParamDecl::Create(
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
C.getPointerType(C.getPointerType(E->getType())) C.getPointerType(C.getPointerType(E->getType()))
@ -3692,17 +3690,6 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
PrivateVarsPos[VD] = Counter; PrivateVarsPos[VD] = Counter;
++Counter; ++Counter;
} }
for (const VarDecl *VD : Data.PrivateLocals) {
QualType Ty = VD->getType().getNonReferenceType();
if (VD->getType()->isLValueReferenceType())
Ty = C.getPointerType(Ty);
Args.push_back(ImplicitParamDecl::Create(
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
ImplicitParamDecl::Other));
PrivateVarsPos[VD] = Counter;
++Counter;
}
const auto &TaskPrivatesMapFnInfo = const auto &TaskPrivatesMapFnInfo =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
llvm::FunctionType *TaskPrivatesMapTy = llvm::FunctionType *TaskPrivatesMapTy =
@ -3958,16 +3945,16 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
/// Checks if destructor function is required to be generated. /// Checks if destructor function is required to be generated.
/// \return true if cleanups are required, false otherwise. /// \return true if cleanups are required, false otherwise.
static bool static bool
checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
ArrayRef<PrivateDataTy> Privates) { bool NeedsCleanup = false;
for (const PrivateDataTy &P : Privates) { auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
if (!P.second.OriginalRef) const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
continue; for (const FieldDecl *FD : PrivateRD->fields()) {
QualType Ty = P.second.Original->getType().getNonReferenceType(); NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
if (Ty.isDestructedType()) if (NeedsCleanup)
return true; break;
} }
return false; return NeedsCleanup;
} }
namespace { namespace {
@ -4137,12 +4124,9 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
/*PrivateElemInit=*/nullptr)); /*PrivateElemInit=*/nullptr));
++I; ++I;
} }
for (const VarDecl *VD : Data.PrivateLocals) llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); return L.first > R.first;
llvm::stable_sort(Privates, });
[](const PrivateDataTy &L, const PrivateDataTy &R) {
return L.first > R.first;
});
QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
// Build type kmp_routine_entry_t (if not built yet). // Build type kmp_routine_entry_t (if not built yet).
emitKmpRoutineEntryT(KmpInt32Ty); emitKmpRoutineEntryT(KmpInt32Ty);
@ -4184,8 +4168,9 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
std::next(TaskFunction->arg_begin(), 3)->getType(); std::next(TaskFunction->arg_begin(), 3)->getType();
if (!Privates.empty()) { if (!Privates.empty()) {
auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
TaskPrivatesMap = TaskPrivatesMap = emitTaskPrivateMappingFunction(
emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
FI->getType(), Privates);
TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
TaskPrivatesMap, TaskPrivatesMapTy); TaskPrivatesMap, TaskPrivatesMapTy);
} else { } else {
@ -4215,8 +4200,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
unsigned Flags = Data.Tied ? TiedFlag : 0; unsigned Flags = Data.Tied ? TiedFlag : 0;
bool NeedsCleanup = false; bool NeedsCleanup = false;
if (!Privates.empty()) { if (!Privates.empty()) {
NeedsCleanup = NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
if (NeedsCleanup) if (NeedsCleanup)
Flags = Flags | DestructorsFlag; Flags = Flags | DestructorsFlag;
} }
@ -11239,64 +11223,56 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
if (!VD) if (!VD)
return Address::invalid(); return Address::invalid();
const VarDecl *CVD = VD->getCanonicalDecl(); const VarDecl *CVD = VD->getCanonicalDecl();
if (CVD->hasAttr<OMPAllocateDeclAttr>()) { if (!CVD->hasAttr<OMPAllocateDeclAttr>())
const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); return Address::invalid();
// Use the default allocation. const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || // Use the default allocation.
AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
!AA->getAllocator()) AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
return Address::invalid(); !AA->getAllocator())
llvm::Value *Size; return Address::invalid();
CharUnits Align = CGM.getContext().getDeclAlign(CVD); llvm::Value *Size;
if (CVD->getType()->isVariablyModifiedType()) { CharUnits Align = CGM.getContext().getDeclAlign(CVD);
Size = CGF.getTypeSize(CVD->getType()); if (CVD->getType()->isVariablyModifiedType()) {
// Align the size: ((size + align - 1) / align) * align Size = CGF.getTypeSize(CVD->getType());
Size = CGF.Builder.CreateNUWAdd( // Align the size: ((size + align - 1) / align) * align
Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); Size = CGF.Builder.CreateNUWAdd(
Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
} else { Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); } else {
Size = CGM.getSize(Sz.alignTo(Align)); CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
} Size = CGM.getSize(Sz.alignTo(Align));
llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
assert(AA->getAllocator() &&
"Expected allocator expression for non-default allocator.");
llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
// According to the standard, the original allocator type is a enum
// (integer). Convert to pointer type, if required.
if (Allocator->getType()->isIntegerTy())
Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
else if (Allocator->getType()->isPointerTy())
Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
Allocator, CGM.VoidPtrTy);
llvm::Value *Args[] = {ThreadID, Size, Allocator};
llvm::Value *Addr =
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_alloc),
Args, getName({CVD->getName(), ".void.addr"}));
llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
Allocator};
llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_free);
CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
llvm::makeArrayRef(FiniArgs));
Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
Addr,
CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
getName({CVD->getName(), ".addr"}));
return Address(Addr, Align);
} }
if (UntiedLocalVarsStack.empty()) llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
return Address::invalid(); assert(AA->getAllocator() &&
const UntiedLocalVarsAddressesMap &UntiedData = UntiedLocalVarsStack.back(); "Expected allocator expression for non-default allocator.");
auto It = UntiedData.find(VD); llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
if (It == UntiedData.end()) // According to the standard, the original allocator type is a enum (integer).
return Address::invalid(); // Convert to pointer type, if required.
if (Allocator->getType()->isIntegerTy())
Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
else if (Allocator->getType()->isPointerTy())
Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
CGM.VoidPtrTy);
llvm::Value *Args[] = {ThreadID, Size, Allocator};
return It->second; llvm::Value *Addr =
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_alloc),
Args, getName({CVD->getName(), ".void.addr"}));
llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
Allocator};
llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_free);
CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
llvm::makeArrayRef(FiniArgs));
Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
Addr,
CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
getName({CVD->getName(), ".addr"}));
return Address(Addr, Align);
} }
CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
@ -11331,21 +11307,6 @@ CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
} }
CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
CodeGenModule &CGM,
const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, Address> &LocalVars)
: CGM(CGM), NeedToPush(!LocalVars.empty()) {
if (!NeedToPush)
return;
CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
}
CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
if (!NeedToPush)
return;
CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
}
bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");

View File

@ -105,7 +105,6 @@ struct OMPTaskDataTy final {
SmallVector<const Expr *, 4> ReductionOrigs; SmallVector<const Expr *, 4> ReductionOrigs;
SmallVector<const Expr *, 4> ReductionCopies; SmallVector<const Expr *, 4> ReductionCopies;
SmallVector<const Expr *, 4> ReductionOps; SmallVector<const Expr *, 4> ReductionOps;
SmallVector<CanonicalDeclPtr<const VarDecl>, 4> PrivateLocals;
struct DependData { struct DependData {
OpenMPDependClauseKind DepKind = OMPC_DEPEND_unknown; OpenMPDependClauseKind DepKind = OMPC_DEPEND_unknown;
const Expr *IteratorExpr = nullptr; const Expr *IteratorExpr = nullptr;
@ -246,19 +245,6 @@ public:
~NontemporalDeclsRAII(); ~NontemporalDeclsRAII();
}; };
/// Manages list of nontemporal decls for the specified directive.
class UntiedTaskLocalDeclsRAII {
CodeGenModule &CGM;
const bool NeedToPush;
public:
UntiedTaskLocalDeclsRAII(
CodeGenModule &CGM,
const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, Address>
&LocalVars);
~UntiedTaskLocalDeclsRAII();
};
/// Maps the expression for the lastprivate variable to the global copy used /// Maps the expression for the lastprivate variable to the global copy used
/// to store new value because original variables are not mapped in inner /// to store new value because original variables are not mapped in inner
/// parallel regions. Only private copies are captured but we need also to /// parallel regions. Only private copies are captured but we need also to
@ -719,10 +705,6 @@ private:
/// The set is the union of all current stack elements. /// The set is the union of all current stack elements.
llvm::SmallVector<NontemporalDeclsSet, 4> NontemporalDeclsStack; llvm::SmallVector<NontemporalDeclsSet, 4> NontemporalDeclsStack;
using UntiedLocalVarsAddressesMap =
llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, Address>;
llvm::SmallVector<UntiedLocalVarsAddressesMap, 4> UntiedLocalVarsStack;
/// Stack for list of addresses of declarations in current context marked as /// Stack for list of addresses of declarations in current context marked as
/// lastprivate conditional. The set is the union of all current stack /// lastprivate conditional. The set is the union of all current stack
/// elements. /// elements.

View File

@ -21,7 +21,6 @@
#include "clang/AST/OpenMPClause.h" #include "clang/AST/OpenMPClause.h"
#include "clang/AST/Stmt.h" #include "clang/AST/Stmt.h"
#include "clang/AST/StmtOpenMP.h" #include "clang/AST/StmtOpenMP.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/OpenMPKinds.h"
#include "clang/Basic/PrettyStackTrace.h" #include "clang/Basic/PrettyStackTrace.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPConstants.h"
@ -3785,42 +3784,6 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective(
checkForLastprivateConditionalUpdate(*this, S); checkForLastprivateConditionalUpdate(*this, S);
} }
namespace {
/// Get the list of variables declared in the context of the untied tasks.
class CheckVarsEscapingUntiedTaskDeclContext final
: public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
llvm::SmallVector<const VarDecl *, 4> PrivateDecls;
public:
explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
void VisitDeclStmt(const DeclStmt *S) {
if (!S)
return;
// Need to privatize only local vars, static locals can be processed as is.
for (const Decl *D : S->decls()) {
if (const auto *VD = dyn_cast_or_null<VarDecl>(D))
if (VD->hasLocalStorage())
PrivateDecls.push_back(VD);
}
}
void VisitOMPExecutableDirective(const OMPExecutableDirective *) { return; }
void VisitCapturedStmt(const CapturedStmt *) { return; }
void VisitLambdaExpr(const LambdaExpr *) { return; }
void VisitBlockExpr(const BlockExpr *) { return; }
void VisitStmt(const Stmt *S) {
if (!S)
return;
for (const Stmt *Child : S->children())
if (Child)
Visit(Child);
}
/// Swaps list of vars with the provided one.
ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
};
} // anonymous namespace
void CodeGenFunction::EmitOMPTaskBasedDirective( void CodeGenFunction::EmitOMPTaskBasedDirective(
const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
@ -3921,22 +3884,14 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
} }
// Get list of local vars for untied tasks.
if (!Data.Tied) {
CheckVarsEscapingUntiedTaskDeclContext Checker;
Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt());
Data.PrivateLocals.append(Checker.getPrivateDecls().begin(),
Checker.getPrivateDecls().end());
}
auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
CapturedRegion](CodeGenFunction &CGF, CapturedRegion](CodeGenFunction &CGF,
PrePostActionTy &Action) { PrePostActionTy &Action) {
llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, Address> UntiedLocalVars;
// Set proper addresses for generated private copies. // Set proper addresses for generated private copies.
OMPPrivateScope Scope(CGF); OMPPrivateScope Scope(CGF);
llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs; llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
!Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) { !Data.LastprivateVars.empty()) {
llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( llvm::FunctionType *CopyFnTy = llvm::FunctionType::get(
CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true);
enum { PrivatesParam = 2, CopyFnParam = 3 }; enum { PrivatesParam = 2, CopyFnParam = 3 };
@ -3972,15 +3927,6 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
PrivatePtrs.emplace_back(VD, PrivatePtr); PrivatePtrs.emplace_back(VD, PrivatePtr);
CallArgs.push_back(PrivatePtr.getPointer()); CallArgs.push_back(PrivatePtr.getPointer());
} }
for (const VarDecl *VD : Data.PrivateLocals) {
QualType Ty = VD->getType().getNonReferenceType();
if (VD->getType()->isLValueReferenceType())
Ty = CGF.getContext().getPointerType(Ty);
Address PrivatePtr = CGF.CreateMemTemp(
CGF.getContext().getPointerType(Ty), ".local.ptr.addr");
UntiedLocalVars.try_emplace(VD, PrivatePtr);
CallArgs.push_back(PrivatePtr.getPointer());
}
CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
for (const auto &Pair : LastprivateDstsOrigs) { for (const auto &Pair : LastprivateDstsOrigs) {
@ -3999,13 +3945,6 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
CGF.getContext().getDeclAlign(Pair.first)); CGF.getContext().getDeclAlign(Pair.first));
Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
} }
// Adjust mapping for internal locals by mapping actual memory instead of
// a pointer to this memory.
for (auto &Pair : UntiedLocalVars) {
Address Replacement(CGF.Builder.CreateLoad(Pair.second),
CGF.getContext().getDeclAlign(Pair.first));
Pair.getSecond() = Replacement;
}
} }
if (Data.Reductions) { if (Data.Reductions) {
OMPPrivateScope FirstprivateScope(CGF); OMPPrivateScope FirstprivateScope(CGF);
@ -4100,8 +4039,6 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
} }
(void)InRedScope.Privatize(); (void)InRedScope.Privatize();
CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF.CGM,
UntiedLocalVars);
Action.Enter(CGF); Action.Enter(CGF);
BodyGen(CGF); BodyGen(CGF);
}; };

View File

@ -1,4 +1,4 @@
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// //
@ -258,7 +258,7 @@ int main() {
a = 4; a = 4;
c = 5; c = 5;
} }
// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*)) // CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*))
// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]]) // CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]])
#pragma omp task untied #pragma omp task untied
{ {
@ -295,54 +295,26 @@ int main() {
// CHECK: store i32 4, i32* [[A_PTR]] // CHECK: store i32 4, i32* [[A_PTR]]
// CHECK: define internal i32 [[TASK_ENTRY6]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %1) // CHECK: define internal i32 [[TASK_ENTRY6]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %1)
// UNTIEDRT: [[S1_ADDR_PTR:%.+]] = alloca %struct.S*, // CHECK: switch i32 %{{.+}}, label
// UNTIEDRT: call void (i8*, ...) %{{.+}}(i8* %{{.+}}, %struct.S** [[S1_ADDR_PTR]])
// UNTIEDRT: [[S1_ADDR:%.+]] = load %struct.S*, %struct.S** [[S1_ADDR_PTR]],
// CHECK: switch i32 %{{.+}}, label %[[DONE:.+]] [
// CHECK: [[DONE]]:
// CHECK: br label %[[CLEANUP:[^,]+]]
// CHECK: load i32*, i32** % // CHECK: load i32*, i32** %
// CHECK: store i32 1, i32* % // CHECK: store i32 1, i32* %
// CHECK: call i32 @__kmpc_omp_task(% // CHECK: call i32 @__kmpc_omp_task(%
// UNTIEDRT: br label %[[EXIT:[^,]+]]
// UNTIEDRT: call void [[CONSTR:@.+]](%struct.S* [[S1_ADDR]])
// CHECK: call i8* @__kmpc_omp_task_alloc( // CHECK: call i8* @__kmpc_omp_task_alloc(
// CHECK: call i32 @__kmpc_omp_task(% // CHECK: call i32 @__kmpc_omp_task(%
// CHECK: load i32*, i32** % // CHECK: load i32*, i32** %
// CHECK: store i32 2, i32* % // CHECK: store i32 2, i32* %
// CHECK: call i32 @__kmpc_omp_task(% // CHECK: call i32 @__kmpc_omp_task(%
// UNTIEDRT: br label %[[EXIT]]
// CHECK: call i32 @__kmpc_omp_taskyield(% // CHECK: call i32 @__kmpc_omp_taskyield(%
// CHECK: load i32*, i32** % // CHECK: load i32*, i32** %
// CHECK: store i32 3, i32* % // CHECK: store i32 3, i32* %
// CHECK: call i32 @__kmpc_omp_task(% // CHECK: call i32 @__kmpc_omp_task(%
// UNTIEDRT: br label %[[EXIT]]
// s1 = S();
// UNTIEDRT: call void [[CONSTR]](%struct.S* [[TMP:%.+]])
// UNTIEDRT: [[DST:%.+]] = bitcast %struct.S* [[S1_ADDR]] to i8*
// UNTIEDRT: [[SRC:%.+]] = bitcast %struct.S* [[TMP]] to i8*
// UNTIEDRT: call void @llvm.memcpy.{{.+}}(i8* {{.*}}[[DST]], i8* {{.*}}[[SRC]], i64 4, i1 false)
// UNTIEDRT: call void [[DESTR:@.+]](%struct.S* [[TMP]])
// CHECK: call i32 @__kmpc_omp_taskwait(% // CHECK: call i32 @__kmpc_omp_taskwait(%
// CHECK: load i32*, i32** % // CHECK: load i32*, i32** %
// CHECK: store i32 4, i32* % // CHECK: store i32 4, i32* %
// CHECK: call i32 @__kmpc_omp_task(% // CHECK: call i32 @__kmpc_omp_task(%
// UNTIEDRT: br label %[[EXIT]]
// UNTIEDRT: call void [[DESTR]](%struct.S* [[S1_ADDR]])
// CHECK: br label %[[CLEANUP]]
// CHECK: [[CLEANUP]]:
// UNTIEDRT: br label %[[EXIT]]
// UNTIEDRT: [[EXIT]]:
// UNTIEDRT-NEXT: ret i32 0
struct S1 { struct S1 {
int a; int a;