[OPENMP]Add support for allocate vars in untied tasks.

Local vars, marked with pragma allocate, mustbe allocate by the call of
the runtime function and cannot be allocated as other local variables.
Instead, we allocate a space for the pointer in private record and store
the address, returned by kmpc_alloc call in this pointer.
So, for untied tasks

```
 #pragma omp task untied
 {
   S s;
    #pragma omp allocate(s) allocator(allocator)
   s = x;
 }
```
compiler generates something like this:
```
struct task_with_privates {
  S *ptr;
};

void entry(task_with_privates *p) {
  S *s = p->s;
  switch(partid) {
  case 1:
    p->s = (S*)kmpc_alloc();
    kmpc_omp_task();
    br exit;
  case 2:
    *s = x;
    kmpc_omp_task();
    br exit;
  case 2:
    ~S(s);
    kmpc_free((void*)s);
    br exit;
  }
exit:
}
```

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D86558
This commit is contained in:
Alexey Bataev 2020-09-15 11:21:47 -04:00 committed by Alexey Bataev
parent 3bc3983f22
commit 738bab743b
10 changed files with 208 additions and 95 deletions

View File

@ -1526,6 +1526,7 @@ void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
FunctionUDMMap.erase(I);
}
LastprivateConditionalToTypes.erase(CGF.CurFn);
FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
}
llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
@ -3382,6 +3383,17 @@ struct PrivateHelpersTy {
typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
} // anonymous namespace
static bool isAllocatableDecl(const VarDecl *VD) {
const VarDecl *CVD = VD->getCanonicalDecl();
if (!CVD->hasAttr<OMPAllocateDeclAttr>())
return false;
const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
// Use the default allocation.
return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
!AA->getAllocator());
}
static RecordDecl *
createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
if (!Privates.empty()) {
@ -3396,9 +3408,12 @@ createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
QualType Type = VD->getType().getNonReferenceType();
// If the private variable is a local variable with lvalue ref type,
// allocate the pointer instead of the pointee type.
if (Pair.second.isLocalPrivate() &&
VD->getType()->isLValueReferenceType())
Type = C.getPointerType(Type);
if (Pair.second.isLocalPrivate()) {
if (VD->getType()->isLValueReferenceType())
Type = C.getPointerType(Type);
if (isAllocatableDecl(VD))
Type = C.getPointerType(Type);
}
FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
if (VD->hasAttrs()) {
for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
@ -3700,6 +3715,8 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
QualType Ty = VD->getType().getNonReferenceType();
if (VD->getType()->isLValueReferenceType())
Ty = C.getPointerType(Ty);
if (isAllocatableDecl(VD))
Ty = C.getPointerType(Ty);
Args.push_back(ImplicitParamDecl::Create(
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
@ -3780,8 +3797,10 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
for (const PrivateDataTy &Pair : Privates) {
// Do not initialize private locals.
if (Pair.second.isLocalPrivate())
if (Pair.second.isLocalPrivate()) {
++FI;
continue;
}
const VarDecl *VD = Pair.second.PrivateCopy;
const Expr *Init = VD->getAnyInitializer();
if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
@ -4146,8 +4165,12 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
/*PrivateElemInit=*/nullptr));
++I;
}
for (const VarDecl *VD : Data.PrivateLocals)
Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
for (const VarDecl *VD : Data.PrivateLocals) {
if (isAllocatableDecl(VD))
Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
else
Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
}
llvm::stable_sort(Privates,
[](const PrivateDataTy &L, const PrivateDataTy &R) {
return L.first > R.first;
@ -11225,44 +11248,27 @@ Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
return CGF.GetAddrOfLocalVar(NativeParam);
}
namespace {
/// Cleanup action for allocate support.
class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
public:
static const int CleanupArgs = 3;
private:
llvm::FunctionCallee RTLFn;
llvm::Value *Args[CleanupArgs];
public:
OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
ArrayRef<llvm::Value *> CallArgs)
: RTLFn(RTLFn) {
assert(CallArgs.size() == CleanupArgs &&
"Size of arguments does not match.");
std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
}
void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
if (!CGF.HaveInsertPoint())
return;
CGF.EmitRuntimeCall(RTLFn, Args);
}
};
} // namespace
Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
const VarDecl *VD) {
if (!VD)
return Address::invalid();
Address UntiedAddr = Address::invalid();
Address UntiedRealAddr = Address::invalid();
auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
if (It != FunctionToUntiedTaskStackMap.end()) {
const UntiedLocalVarsAddressesMap &UntiedData =
UntiedLocalVarsStack[It->second];
auto I = UntiedData.find(VD);
if (I != UntiedData.end()) {
UntiedAddr = I->second.first;
UntiedRealAddr = I->second.second;
}
}
const VarDecl *CVD = VD->getCanonicalDecl();
if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
// Use the default allocation.
if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
!AA->getAllocator())
return Address::invalid();
if (!isAllocatableDecl(VD))
return UntiedAddr;
llvm::Value *Size;
CharUnits Align = CGM.getContext().getDeclAlign(CVD);
if (CVD->getType()->isVariablyModifiedType()) {
@ -11277,43 +11283,80 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
Size = CGM.getSize(Sz.alignTo(Align));
}
llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
assert(AA->getAllocator() &&
"Expected allocator expression for non-default allocator.");
llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
// According to the standard, the original allocator type is a enum
// (integer). Convert to pointer type, if required.
if (Allocator->getType()->isIntegerTy())
Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
else if (Allocator->getType()->isPointerTy())
Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
Allocator, CGM.VoidPtrTy);
Allocator = CGF.EmitScalarConversion(
Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
AA->getAllocator()->getExprLoc());
llvm::Value *Args[] = {ThreadID, Size, Allocator};
llvm::Value *Addr =
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_alloc),
Args, getName({CVD->getName(), ".void.addr"}));
llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
Allocator};
llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_free);
CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
llvm::makeArrayRef(FiniArgs));
QualType Ty = CGM.getContext().getPointerType(CVD->getType());
Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
Addr,
CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
getName({CVD->getName(), ".addr"}));
return Address(Addr, Align);
}
if (UntiedLocalVarsStack.empty())
return Address::invalid();
const UntiedLocalVarsAddressesMap &UntiedData = UntiedLocalVarsStack.back();
auto It = UntiedData.find(VD);
if (It == UntiedData.end())
return Address::invalid();
Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
if (UntiedAddr.isValid())
CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
return It->second;
// Cleanup action for allocate support.
class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
llvm::FunctionCallee RTLFn;
unsigned LocEncoding;
Address Addr;
const Expr *Allocator;
public:
OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding,
Address Addr, const Expr *Allocator)
: RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
Allocator(Allocator) {}
void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
if (!CGF.HaveInsertPoint())
return;
llvm::Value *Args[3];
Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
CGF, SourceLocation::getFromRawEncoding(LocEncoding));
Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
Addr.getPointer(), CGF.VoidPtrTy);
llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
// According to the standard, the original allocator type is a enum
// (integer). Convert to pointer type, if required.
AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
CGF.getContext().VoidPtrTy,
Allocator->getExprLoc());
Args[2] = AllocVal;
CGF.EmitRuntimeCall(RTLFn, Args);
}
};
Address VDAddr =
UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
VDAddr, AA->getAllocator());
if (UntiedRealAddr.isValid())
if (auto *Region =
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
Region->emitUntiedSwitch(CGF);
return VDAddr;
}
return UntiedAddr;
}
bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
const VarDecl *VD) const {
auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
if (It == FunctionToUntiedTaskStackMap.end())
return false;
return UntiedLocalVarsStack[It->second].count(VD) > 0;
}
CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
@ -11349,11 +11392,14 @@ CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
}
CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
CodeGenModule &CGM,
const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, Address> &LocalVars)
: CGM(CGM), NeedToPush(!LocalVars.empty()) {
CodeGenFunction &CGF,
const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>,
std::pair<Address, Address>> &LocalVars)
: CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
if (!NeedToPush)
return;
CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
}

View File

@ -253,9 +253,9 @@ public:
public:
UntiedTaskLocalDeclsRAII(
CodeGenModule &CGM,
const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, Address>
&LocalVars);
CodeGenFunction &CGF,
const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>,
std::pair<Address, Address>> &LocalVars);
~UntiedTaskLocalDeclsRAII();
};
@ -432,6 +432,8 @@ private:
std::tuple<QualType, const FieldDecl *,
const FieldDecl *, LValue>>>
LastprivateConditionalToTypes;
/// Maps function to the position of the untied task locals stack.
llvm::DenseMap<llvm::Function *, unsigned> FunctionToUntiedTaskStackMap;
/// Type kmp_critical_name, originally defined as typedef kmp_int32
/// kmp_critical_name[8];
llvm::ArrayType *KmpCriticalNameTy;
@ -720,7 +722,8 @@ private:
llvm::SmallVector<NontemporalDeclsSet, 4> NontemporalDeclsStack;
using UntiedLocalVarsAddressesMap =
llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, Address>;
llvm::DenseMap<CanonicalDeclPtr<const VarDecl>,
std::pair<Address, Address>>;
llvm::SmallVector<UntiedLocalVarsAddressesMap, 4> UntiedLocalVarsStack;
/// Stack for list of addresses of declarations in current context marked as
@ -1882,6 +1885,9 @@ public:
/// Destroys user defined allocators specified in the uses_allocators clause.
void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator);
/// Returns true if the variable is a local variable in untied task.
bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const;
};
/// Class supports emissionof SIMD-only code.

View File

@ -1563,6 +1563,17 @@ static void emitCommonOMPParallelDirective(
CapturedVars, IfCond);
}
static bool isAllocatableDecl(const VarDecl *VD) {
const VarDecl *CVD = VD->getCanonicalDecl();
if (!CVD->hasAttr<OMPAllocateDeclAttr>())
return false;
const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
// Use the default allocation.
return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
!AA->getAllocator());
}
static void emitEmptyBoundParameters(CodeGenFunction &,
const OMPExecutableDirective &,
llvm::SmallVectorImpl<llvm::Value *> &) {}
@ -1575,12 +1586,7 @@ Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
if (!VD)
return Address::invalid();
const VarDecl *CVD = VD->getCanonicalDecl();
if (!CVD->hasAttr<OMPAllocateDeclAttr>())
return Address::invalid();
const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
// Use the default allocation.
if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
!AA->getAllocator())
if (!isAllocatableDecl(CVD))
return Address::invalid();
llvm::Value *Size;
CharUnits Align = CGM.getContext().getDeclAlign(CVD);
@ -1596,6 +1602,7 @@ Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
Size = CGM.getSize(Sz.alignTo(Align));
}
const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
assert(AA->getAllocator() &&
"Expected allocator expression for non-default allocator.");
llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
@ -3931,7 +3938,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
CapturedRegion](CodeGenFunction &CGF,
PrePostActionTy &Action) {
llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, Address> UntiedLocalVars;
llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, std::pair<Address, Address>>
UntiedLocalVars;
// Set proper addresses for generated private copies.
OMPPrivateScope Scope(CGF);
llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
@ -3976,9 +3984,11 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
QualType Ty = VD->getType().getNonReferenceType();
if (VD->getType()->isLValueReferenceType())
Ty = CGF.getContext().getPointerType(Ty);
if (isAllocatableDecl(VD))
Ty = CGF.getContext().getPointerType(Ty);
Address PrivatePtr = CGF.CreateMemTemp(
CGF.getContext().getPointerType(Ty), ".local.ptr.addr");
UntiedLocalVars.try_emplace(VD, PrivatePtr);
UntiedLocalVars.try_emplace(VD, PrivatePtr, Address::invalid());
CallArgs.push_back(PrivatePtr.getPointer());
}
CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
@ -4002,9 +4012,18 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
// Adjust mapping for internal locals by mapping actual memory instead of
// a pointer to this memory.
for (auto &Pair : UntiedLocalVars) {
Address Replacement(CGF.Builder.CreateLoad(Pair.second),
CGF.getContext().getDeclAlign(Pair.first));
Pair.getSecond() = Replacement;
if (isAllocatableDecl(Pair.first)) {
llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
Address Replacement(Ptr, CGF.getPointerAlign());
Pair.getSecond().first = Replacement;
Ptr = CGF.Builder.CreateLoad(Replacement);
Replacement = Address(Ptr, CGF.getContext().getDeclAlign(Pair.first));
Pair.getSecond().second = Replacement;
} else {
llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
Address Replacement(Ptr, CGF.getContext().getDeclAlign(Pair.first));
Pair.getSecond().first = Replacement;
}
}
}
if (Data.Reductions) {
@ -4100,7 +4119,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
}
(void)InRedScope.Privatize();
CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF.CGM,
CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
UntiedLocalVars);
Action.Enter(CGF);
BodyGen(CGF);

View File

@ -85,6 +85,7 @@ int main () {
// CHECK-NOT: {{__kmpc_alloc|__kmpc_free}}
// CHECK: store i32 %{{.+}}, i32* [[V_ADDR]],
// CHECK-NEXT: [[V_VAL:%.+]] = load i32, i32* [[V_ADDR]],
// CHECK-NEXT: [[V_VOID_ADDR:%.+]] = bitcast i32* [[V_ADDR]] to i8*
// CHECK-NEXT: call void @__kmpc_free(i32 [[GTID]], i8* [[V_VOID_ADDR]], i8* inttoptr (i64 6 to i8*))
// CHECK-NOT: {{__kmpc_alloc|__kmpc_free}}
// CHECK: ret i32 [[V_VAL]]
@ -101,7 +102,9 @@ void bar(int a, float &z) {
// CHECK: [[Z_ADDR:%.+]] = bitcast i8* [[Z_VOID_PTR]] to float**
// CHECK: store float* %{{.+}}, float** [[Z_ADDR]],
#pragma omp allocate(a,z) allocator(omp_default_mem_alloc)
// CHECK-NEXT: [[Z_VOID_PTR:%.+]] = bitcast float** [[Z_ADDR]] to i8*
// CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[Z_VOID_PTR]], i8* inttoptr (i64 1 to i8*))
// CHECK-NEXT: [[A_VOID_PTR:%.+]] = bitcast i32* [[A_ADDR]] to i8*
// CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[A_VOID_PTR]], i8* inttoptr (i64 1 to i8*))
// CHECK: ret void
}

View File

@ -654,7 +654,8 @@ int main() {
// CHECK-NEXT: br label %[[LAST_DONE]]
// CHECK: [[LAST_DONE]]
// CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[F_VOID_PTR]], i8* inttoptr (i64 3 to i8*))
// CHECK: [[F_VOID_PTR:%.+]] = bitcast float* [[F_PRIV]] to i8*
// CHECK-NEXT: call void @__kmpc_free(i32 [[GTID]], i8* [[F_VOID_PTR]], i8* inttoptr (i64 3 to i8*))
// CHECK-NEXT: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
// CHECK-NEXT: ret void

View File

@ -414,6 +414,7 @@ int main() {
// CHECK: [[ADD:%.+]] = add nsw i64 [[LVAR_VAL]], 3
// CHECK: store i64 [[ADD]], i64* [[LVAR_PRIV]],
// CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 %{{.+}})
// CHECK: [[LVAR_VOID_PTR:%.+]] = bitcast i64* [[LVAR_PRIV]] to i8*
// CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[LVAR_VOID_PTR]], i8* inttoptr (i64 5 to i8*))
// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
// CHECK: ret void

View File

@ -876,6 +876,7 @@ int main() {
// CHECK: getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* %{{.+}}, i64 4
// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [4 x [[S_FLOAT_TY]]]** %
// CHECK: [[VAR3_VOID_PTR:%.+]] = bitcast [4 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]] to i8*
// CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[VAR3_VOID_PTR]], i8* inttoptr (i64 6 to i8*))
// CHECK: ret void

View File

@ -423,6 +423,7 @@ int main() {
// CHECK-64: [[T_VAR_VAL:%.+]] = load i32, i32* [[BC]],
// CHECK: store i32 [[T_VAR_VAL]], i32* [[T_VAR_PRIV]],
// CHECK: store i32 0, i32* [[T_VAR_PRIV]],
// CHECK: [[T_VAR_VOID_PTR:%.+]] = bitcast i32* [[T_VAR_PRIV]] to i8*
// CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[T_VAR_VOID_PTR]], i8* inttoptr ([[iz]] 1 to i8*))
// CHECK: ret void
@ -584,6 +585,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
// ARRAY: [[SIZE:%.+]] = mul nuw i64 %{{.+}}, 8
// ARRAY: [[BC:%.+]] = bitcast double* [[VLA2_PTR]] to i8*
// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC]], i8* align 128 %{{.+}}, i64 [[SIZE]], i1 false)
// ARRAY: [[VLA2_VOID_PTR:%.+]] = bitcast double* [[VLA2_PTR]] to i8*
// ARRAY: call void @__kmpc_free(i32 [[GTID]], i8* [[VLA2_VOID_PTR]], i8* inttoptr (i64 8 to i8*))
// ARRAY-NEXT: ret void
#endif

View File

@ -361,12 +361,13 @@ int main() {
// CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_PTR]],
// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]],
// CHECK: [[A_VOID_PTR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 4, i8* inttoptr (i64 2 to i8*))
// CHECK: [[A_PRIV:%.+]] = bitcast i8* [[A_VOID_PTR]] to i32*
// CHECK: store i{{[0-9]+}}* [[A_PRIV]], i{{[0-9]+}}** [[REF:%.+]],
// CHECK: [[A_PRIV_ADDR:%.+]] = bitcast i8* [[A_VOID_PTR]] to i32*
// CHECK: store i{{[0-9]+}}* [[A_PRIV_ADDR]], i{{[0-9]+}}** [[REF:%.+]],
// CHECK-NEXT: [[A_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REF]],
// CHECK-NEXT: [[A_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_PRIV]],
// CHECK-NEXT: [[INC:%.+]] = add nsw i{{[0-9]+}} [[A_VAL]], 1
// CHECK-NEXT: store i{{[0-9]+}} [[INC]], i{{[0-9]+}}* [[A_PRIV]],
// CHECK-NEXT: [[A_VOID_PTR:%.+]] = bitcast i32* [[A_PRIV_ADDR]] to i8*
// CHECK-NEXT: call void @__kmpc_free(i32 [[GTID]], i8* [[A_VOID_PTR]], i8* inttoptr (i64 2 to i8*))
// CHECK-NEXT: ret void

View File

@ -1,6 +1,6 @@
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -DUNTIEDRT | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s -DUNTIEDRT
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT
//
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
@ -14,6 +14,19 @@
#ifndef HEADER
#define HEADER
enum omp_allocator_handle_t {
omp_null_allocator = 0,
omp_default_mem_alloc = 1,
omp_large_cap_mem_alloc = 2,
omp_const_mem_alloc = 3,
omp_high_bw_mem_alloc = 4,
omp_low_lat_mem_alloc = 5,
omp_cgroup_mem_alloc = 6,
omp_pteam_mem_alloc = 7,
omp_thread_mem_alloc = 8,
KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__
};
// CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* }
// CHECK-DAG: [[STRUCT_SHAREDS:%.+]] = type { i8*, [2 x [[STRUCT_S:%.+]]]* }
// CHECK-DAG: [[STRUCT_SHAREDS1:%.+]] = type { [2 x [[STRUCT_S:%.+]]]* }
@ -258,21 +271,26 @@ int main() {
a = 4;
c = 5;
}
// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*))
// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 256, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*))
// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]])
#pragma omp task untied
#pragma omp task untied firstprivate(c) allocate(omp_pteam_mem_alloc:c)
{
S s1;
S s1, s2;
#ifdef UNTIEDRT
#pragma omp allocate(s2) allocator(omp_pteam_mem_alloc)
#endif
s2.a = 0;
#pragma omp task
a = 4;
a = c = 4;
#pragma omp taskyield
s1 = S();
s2.a = 10;
#pragma omp taskwait
}
return a;
}
// CHECK: define internal i32 [[TASK_ENTRY1]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %1)
// CHECK: store i32 15, i32* [[A_PTR:@.+]]
// CHECK: store i32 15, i32* [[A_PTR:@.+]],
// CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_PTR]]
// CHECK: [[A_VAL_I8:%.+]] = trunc i32 [[A_VAL]] to i8
// CHECK: store i8 [[A_VAL_I8]], i8* %{{.+}}
@ -294,10 +312,13 @@ int main() {
// CHECK: define internal i32
// CHECK: store i32 4, i32* [[A_PTR]]
// CHECK: define internal i32 [[TASK_ENTRY6]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %1)
// CHECK: define internal i32 [[TASK_ENTRY6]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %{{.+}})
// UNTIEDRT: [[S1_ADDR_PTR:%.+]] = alloca %struct.S*,
// UNTIEDRT: call void (i8*, ...) %{{.+}}(i8* %{{.+}}, %struct.S** [[S1_ADDR_PTR]])
// UNTIEDRT: [[S1_ADDR:%.+]] = load %struct.S*, %struct.S** [[S1_ADDR_PTR]],
// UNTIEDRT: [[S2_ADDR_PTR_REF:%.+]] = alloca %struct.S**,
// UNTIEDRT: call void (i8*, ...) %{{.+}}(i8* %{{.+}}, %struct.S** [[S1_ADDR_PTR]], %struct.S*** [[S2_ADDR_PTR_REF]])
// UNTIEDRT-DAG: [[S1_ADDR:%.+]] = load %struct.S*, %struct.S** [[S1_ADDR_PTR]],
// UNTIEDRT-DAG: [[S2_ADDR_PTR:%.+]] = load %struct.S**, %struct.S*** [[S2_ADDR_PTR_REF]],
// UNTIEDRT-DAG: [[S2_ADDR:%.+]] = load %struct.S*, %struct.S** [[S2_ADDR_PTR]],
// CHECK: switch i32 %{{.+}}, label %[[DONE:.+]] [
// CHECK: [[DONE]]:
@ -309,16 +330,25 @@ int main() {
// UNTIEDRT: br label %[[EXIT:[^,]+]]
// UNTIEDRT: call void [[CONSTR:@.+]](%struct.S* [[S1_ADDR]])
// UNTIEDRT: [[S2_VOID_PTR:%.+]] = call i8* @__kmpc_alloc(i32 %{{.+}}, i64 4, i8* inttoptr (i64 7 to i8*))
// UNTIEDRT: [[S2_PTR:%.+]] = bitcast i8* [[S2_VOID_PTR]] to %struct.S*
// UNTIEDRT: store %struct.S* [[S2_PTR]], %struct.S** [[S2_ADDR_PTR]],
// UNTIEDRT: load i32*, i32** %
// UNTIEDRT: store i32 2, i32* %
// UNTIEDRT: call i32 @__kmpc_omp_task(%
// UNTIEDRT: br label %[[EXIT]]
// UNTIEDRT: call void [[CONSTR]](%struct.S* [[S2_ADDR]])
// CHECK: call i8* @__kmpc_omp_task_alloc(
// CHECK: call i32 @__kmpc_omp_task(%
// CHECK: load i32*, i32** %
// CHECK: store i32 2, i32* %
// CHECK: store i32 {{2|3}}, i32* %
// CHECK: call i32 @__kmpc_omp_task(%
// UNTIEDRT: br label %[[EXIT]]
// CHECK: call i32 @__kmpc_omp_taskyield(%
// CHECK: load i32*, i32** %
// CHECK: store i32 3, i32* %
// CHECK: store i32 {{3|4}}, i32* %
// CHECK: call i32 @__kmpc_omp_task(%
// UNTIEDRT: br label %[[EXIT]]
@ -331,10 +361,13 @@ int main() {
// CHECK: call i32 @__kmpc_omp_taskwait(%
// CHECK: load i32*, i32** %
// CHECK: store i32 4, i32* %
// CHECK: store i32 {{4|5}}, i32* %
// CHECK: call i32 @__kmpc_omp_task(%
// UNTIEDRT: br label %[[EXIT]]
// UNTIEDRT: call void [[DESTR]](%struct.S* [[S2_ADDR]])
// UNTIEDRT: [[S2_VOID_PTR:%.+]] = bitcast %struct.S* [[S2_ADDR]] to i8*
// UNTIEDRT: call void @__kmpc_free(i32 %{{.+}}, i8* [[S2_VOID_PTR]], i8* inttoptr (i64 7 to i8*))
// UNTIEDRT: call void [[DESTR]](%struct.S* [[S1_ADDR]])
// CHECK: br label %[[CLEANUP]]