[OPENMP50]Codegen for depobj dependency kind.

Implemented codegen for depobj modifier in depend clauses.
This commit is contained in:
Alexey Bataev 2020-03-09 17:18:19 -04:00
parent c7b2e7f527
commit 6309334b95
4 changed files with 200 additions and 54 deletions

View File

@ -5236,7 +5236,36 @@ static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
}
}
Address CGOpenMPRuntime::emitDependClause(
std::pair<llvm::Value *, LValue>
CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
SourceLocation Loc) {
ASTContext &C = CGM.getContext();
QualType FlagsTy;
getDependTypes(C, KmpDependInfoTy, FlagsTy);
RecordDecl *KmpDependInfoRD =
cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
LValue Base = CGF.EmitLoadOfPointerLValue(
DepobjLVal.getAddress(CGF),
C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
Base.getTBAAInfo());
llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
Addr.getPointer(),
llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
LValue NumDepsBase = CGF.MakeAddrLValue(
Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
Base.getBaseInfo(), Base.getTBAAInfo());
// NumDeps = deps[i].base_addr;
LValue BaseAddrLVal = CGF.EmitLValueForField(
NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
return std::make_pair(NumDeps, Base);
}
std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
CodeGenFunction &CGF,
ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependencies,
bool ForDepobj, SourceLocation Loc) {
@ -5244,21 +5273,42 @@ Address CGOpenMPRuntime::emitDependClause(
ASTContext &C = CGM.getContext();
Address DependenciesArray = Address::invalid();
unsigned NumDependencies = Dependencies.size();
llvm::Value *NumOfElements = nullptr;
if (NumDependencies) {
QualType FlagsTy;
getDependTypes(C, KmpDependInfoTy, FlagsTy);
RecordDecl *KmpDependInfoRD =
cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
unsigned NumDepobjDependecies = 0;
SmallVector<std::pair<llvm::Value *, LValue>, 4> Depobjs;
llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
// Calculate number of depobj dependecies.
for (const std::pair<OpenMPDependClauseKind, const Expr *> &Pair :
Dependencies) {
if (Pair.first != OMPC_DEPEND_depobj)
continue;
LValue DepobjLVal = CGF.EmitLValue(Pair.second);
llvm::Value *NumDeps;
LValue Base;
std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
NumOfDepobjElements =
CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumDeps);
Depobjs.emplace_back(NumDeps, Base);
++NumDepobjDependecies;
}
QualType KmpDependInfoArrayTy;
// Define type kmp_depend_info[<Dependencies.size()>];
// For depobj reserve one extra element to store the number of elements.
// It is required to handle depobj(x) update(in) construct.
QualType KmpDependInfoArrayTy = C.getConstantArrayType(
KmpDependInfoTy,
llvm::APInt(/*numBits=*/64, NumDependencies + (ForDepobj ? 1 : 0)),
nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
// kmp_depend_info[<Dependencies.size()>] deps;
if (ForDepobj) {
assert(NumDepobjDependecies == 0 &&
"depobj dependency kind is not expected in depobj directive.");
KmpDependInfoArrayTy = C.getConstantArrayType(
KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
// Need to allocate on the dynamic memory.
llvm::Value *ThreadID = getThreadID(CGF, Loc);
// Use default allocator.
@ -5273,9 +5323,38 @@ Address CGOpenMPRuntime::emitDependClause(
Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
Addr, CGF.ConvertTypeForMem(KmpDependInfoArrayTy)->getPointerTo());
DependenciesArray = Address(Addr, Align);
NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
/*isSigned=*/false);
} else if (NumDepobjDependecies > 0) {
NumOfElements = CGF.Builder.CreateNUWAdd(
NumOfDepobjElements,
llvm::ConstantInt::get(CGM.IntPtrTy,
NumDependencies - NumDepobjDependecies,
/*isSigned=*/false));
NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
/*isSigned=*/false);
OpaqueValueExpr OVE(
Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
VK_RValue);
CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
RValue::get(NumOfElements));
KmpDependInfoArrayTy =
C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
/*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
// CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
// Properly emit variable-sized array.
auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
ImplicitParamDecl::Other);
CGF.EmitVarDecl(*PD);
DependenciesArray = CGF.GetAddrOfLocalVar(PD);
} else {
KmpDependInfoArrayTy = C.getConstantArrayType(
KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
DependenciesArray =
CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
/*isSigned=*/false);
}
if (ForDepobj) {
// Write number of elements in the first element of array for depobj.
@ -5289,7 +5368,10 @@ Address CGOpenMPRuntime::emitDependClause(
Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
CGF.EmitStoreOfScalar(NumVal, BaseAddrLVal);
}
unsigned Pos = ForDepobj ? 1 : 0;
for (unsigned I = 0; I < NumDependencies; ++I) {
if (Dependencies[I].first == OMPC_DEPEND_depobj)
continue;
const Expr *E = Dependencies[I].second;
LValue Addr = CGF.EmitLValue(E);
llvm::Value *Size;
@ -5307,10 +5389,16 @@ Address CGOpenMPRuntime::emitDependClause(
} else {
Size = CGF.getTypeSize(Ty);
}
LValue Base =
CGF.MakeAddrLValue(CGF.Builder.CreateConstArrayGEP(
DependenciesArray, I + (ForDepobj ? 1 : 0)),
KmpDependInfoTy);
LValue Base;
if (NumDepobjDependecies > 0) {
Base = CGF.MakeAddrLValue(
CGF.Builder.CreateConstGEP(DependenciesArray, Pos),
KmpDependInfoTy);
} else {
Base = CGF.MakeAddrLValue(
CGF.Builder.CreateConstArrayGEP(DependenciesArray, Pos),
KmpDependInfoTy);
}
// deps[i].base_addr = &<Dependencies[i].second>;
LValue BaseAddrLVal = CGF.EmitLValueForField(
Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
@ -5328,12 +5416,30 @@ Address CGOpenMPRuntime::emitDependClause(
Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
FlagsLVal);
++Pos;
}
// Copy final depobj arrays.
if (NumDepobjDependecies > 0) {
llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
Address Addr = CGF.Builder.CreateConstGEP(DependenciesArray, Pos);
for (const std::pair<llvm::Value *, LValue> &Pair : Depobjs) {
llvm::Value *Size = CGF.Builder.CreateNUWMul(ElSize, Pair.first);
CGF.Builder.CreateMemCpy(Addr, Pair.second.getAddress(CGF), Size);
Addr =
Address(CGF.Builder.CreateGEP(
Addr.getElementType(), Addr.getPointer(), Pair.first),
DependenciesArray.getAlignment().alignmentOfArrayElement(
C.getTypeSizeInChars(KmpDependInfoTy)));
}
DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
DependenciesArray, CGF.VoidPtrTy);
} else {
DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
CGF.Builder.CreateConstArrayGEP(DependenciesArray, ForDepobj ? 1 : 0),
CGF.VoidPtrTy);
}
DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
CGF.Builder.CreateConstArrayGEP(DependenciesArray, ForDepobj ? 1 : 0),
CGF.VoidPtrTy);
}
return DependenciesArray;
return std::make_pair(NumOfElements, DependenciesArray);
}
void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
@ -5370,24 +5476,9 @@ void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
RecordDecl *KmpDependInfoRD =
cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
LValue Base = CGF.EmitLoadOfPointerLValue(
DepobjLVal.getAddress(CGF),
C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
Base.getTBAAInfo());
llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
Addr.getPointer(),
llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
LValue NumDepsBase = CGF.MakeAddrLValue(
Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
Base.getBaseInfo(), Base.getTBAAInfo());
// NumDeps = deps[i].base_addr;
LValue BaseAddrLVal = CGF.EmitLValueForField(
NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
llvm::Value *NumDeps;
LValue Base;
std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
Address Begin = Base.getAddress(CGF);
// Cast from pointer to array type to pointer to single element.
@ -5439,9 +5530,10 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
LValue TDBase = Result.TDBase;
const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
// Process list of dependences.
Address DependenciesArray =
Address DependenciesArray = Address::invalid();
llvm::Value *NumOfElements;
std::tie(NumOfElements, DependenciesArray) =
emitDependClause(CGF, Data.Dependences, /*ForDepobj=*/false, Loc);
unsigned NumDependencies = Data.Dependences.size();
// NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
// libcall.
@ -5453,24 +5545,23 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
llvm::Value *DepTaskArgs[7];
if (NumDependencies) {
if (!Data.Dependences.empty()) {
DepTaskArgs[0] = UpLoc;
DepTaskArgs[1] = ThreadID;
DepTaskArgs[2] = NewTask;
DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
DepTaskArgs[3] = NumOfElements;
DepTaskArgs[4] = DependenciesArray.getPointer();
DepTaskArgs[5] = CGF.Builder.getInt32(0);
DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
}
auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
&TaskArgs,
auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
&DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
if (!Data.Tied) {
auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
}
if (NumDependencies) {
if (!Data.Dependences.empty()) {
CGF.EmitRuntimeCall(
createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
} else {
@ -5484,16 +5575,16 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
};
llvm::Value *DepWaitTaskArgs[6];
if (NumDependencies) {
if (!Data.Dependences.empty()) {
DepWaitTaskArgs[0] = UpLoc;
DepWaitTaskArgs[1] = ThreadID;
DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
DepWaitTaskArgs[2] = NumOfElements;
DepWaitTaskArgs[3] = DependenciesArray.getPointer();
DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
}
auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
NumDependencies, &DepWaitTaskArgs,
&Data, &DepWaitTaskArgs,
Loc](CodeGenFunction &CGF, PrePostActionTy &) {
CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
CodeGenFunction::RunCleanupsScope LocalScope(CGF);
@ -5501,7 +5592,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
// kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
// ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
// is specified.
if (NumDependencies)
if (!Data.Dependences.empty())
CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
DepWaitTaskArgs);
// Call proxy_task_entry(gtid, new_task);

View File

@ -855,6 +855,14 @@ private:
StringRef UniqueDeclName, LValue LVal,
SourceLocation Loc);
/// Returns the number of the elements and the address of the depobj
/// dependency array.
/// \return Number of elements in depobj array and the pointer to the array of
/// dependencies.
std::pair<llvm::Value *, LValue> getDepobjElements(CodeGenFunction &CGF,
LValue DepobjLVal,
SourceLocation Loc);
public:
explicit CGOpenMPRuntime(CodeGenModule &CGM)
: CGOpenMPRuntime(CGM, ".", ".") {}
@ -1782,7 +1790,7 @@ public:
/// \param ForDepobj true if the memory for depencies is alloacted for depobj
/// directive. In this case, the variable is allocated in dynamically.
/// \returns Pointer to the first element of the array casted to VoidPtr type.
Address emitDependClause(
std::pair<llvm::Value *, Address> emitDependClause(
CodeGenFunction &CGF,
ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependencies,
bool ForDepobj, SourceLocation Loc);

View File

@ -3809,7 +3809,7 @@ void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
for (const Expr *IRef : DC->varlists())
Dependencies.emplace_back(DC->getDependencyKind(), IRef);
Address DepAddr = CGM.getOpenMPRuntime().emitDependClause(
*this, Dependencies, /*ForDepobj=*/true, DC->getBeginLoc());
*this, Dependencies, /*ForDepobj=*/true, DC->getBeginLoc()).second;
EmitStoreOfScalar(DepAddr.getPointer(), DOLVal);
return;
}

View File

@ -1,23 +1,70 @@
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=50 -x c -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-apple-darwin10 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-apple-darwin10 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fopenmp-version=50 -x c -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-apple-darwin10 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
typedef void *omp_depend_t;
void foo();
// CHECK-LABEL: @main
int main() {
// CHECK: call i32 @__kmpc_global_thread_num(
// CHECK: call i8* @__kmpc_omp_task_alloc(
// CHECK: call i32 @__kmpc_omp_task(
#pragma omp task
omp_depend_t d, x;
int a;
// CHECK: [[D_ADDR:%.+]] = alloca i8*,
// CHECK: [[X_ADDR:%.+]] = alloca i8*,
// CHECK: [[A_ADDR:%.+]] = alloca i32,
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
// CHECK: [[ALLOC:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %0, i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, [[PRIVATES_TY:%.+]]*)* [[TASK_ENTRY:@.+]] to i32 (i32, i8*)*))
// CHECK: [[DATA:%.+]] = bitcast i8* [[ALLOC]] to [[PRIVATES_TY]]*
// CHECK: [[D:%.+]] = load i8*, i8** [[D_ADDR]],
// CHECK: [[D_DEP:%.+]] = bitcast i8* [[D]] to %struct.kmp_depend_info*
// CHECK: [[D_DEP_BASE:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[D_DEP]], i{{.+}} -1
// CHECK: [[D_DEP_BASE_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[D_DEP_BASE]], i{{.+}} 0, i{{.+}} 0
// CHECK: [[SIZE1:%.+]] = load i64, i64* [[D_DEP_BASE_SIZE]],
// CHECK: [[SIZE:%.+]] = add nuw i64 0, [[SIZE1]]
// CHECK: [[X:%.+]] = load i8*, i8** [[X_ADDR]],
// CHECK: [[X_DEP:%.+]] = bitcast i8* [[X]] to %struct.kmp_depend_info*
// CHECK: [[X_DEP_BASE:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[X_DEP]], i{{.+}} -1
// CHECK: [[X_DEP_BASE_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[X_DEP_BASE]], i{{.+}} 0, i{{.+}} 0
// CHECK: [[SIZE2:%.+]] = load i64, i64* [[X_DEP_BASE_SIZE]],
// CHECK: [[SIZE3:%.+]] = add nuw i64 [[SIZE]], [[SIZE2]]
// CHECK: [[SIZE:%.+]] = add nuw i64 [[SIZE3]], 1
// CHECK: [[SIZE32:%.+]] = trunc i64 [[SIZE]] to i32
// CHECK: [[SIZE64:%.+]] = zext i32 [[SIZE32]] to i64
// CHECK: [[SV:%.+]] = call i8* @llvm.stacksave()
// CHECK: store i8* [[SV]], i8** [[SV_ADDR:%.+]],
// CHECK: [[VLA:%.+]] = alloca %struct.kmp_depend_info, i64 [[SIZE64]],
// CHECK: [[VLA0:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[VLA]], i64 0
// CHECK: [[BASE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[VLA0]], i{{.+}} 0, i{{.+}} 0
// CHECK: [[A_ADDR_CAST:%.+]] = ptrtoint i32* [[A_ADDR]] to i64
// CHECK: store i64 [[A_ADDR_CAST]], i64* [[BASE_ADDR]],
// CHECK: [[SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[VLA0]], i{{.+}} 0, i{{.+}} 1
// CHECK: store i64 4, i64* [[SIZE_ADDR]],
// CHECK: [[FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[VLA0]], i{{.+}} 0, i{{.+}} 2
// CHECK: store i8 1, i8* [[FLAGS_ADDR]],
// CHECK: [[VLA_D:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[VLA]], i64 1
// CHECK: [[D_SIZE:%.+]] = mul nuw i64 24, [[SIZE1]]
// CHECK: [[DEST:%.+]] = bitcast %struct.kmp_depend_info* [[VLA_D]] to i8*
// CHECK: [[SRC:%.+]] = bitcast %struct.kmp_depend_info* [[D_DEP]] to i8*
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DEST]], i8* align 8 [[SRC]], i64 [[D_SIZE]], i1 false)
// CHECK: [[VLA_X:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %25, i64 [[SIZE1]]
// CHECK: [[X_SIZE:%.+]] = mul nuw i64 24, [[SIZE2]]
// CHECK: [[DEST:%.+]] = bitcast %struct.kmp_depend_info* [[VLA_X]] to i8*
// CHECK: [[SRC:%.+]] = bitcast %struct.kmp_depend_info* [[X_DEP]] to i8*
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DEST]], i8* align 8 [[SRC]], i64 [[X_SIZE]], i1 false)
// CHECK: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[VLA]] to i8*
// CHECK: call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* [[ALLOC]], i32 [[SIZE32]], i8* [[BC]], i32 0, i8* null)
// CHECK: [[SV:%.+]] = load i8*, i8** [[SV_ADDR]],
// CHECK: call void @llvm.stackrestore(i8* [[SV]])
#pragma omp task depend(in: a) depend(depobj: d, x)
{
#pragma omp taskgroup
{