[OPENMP50]Codegen for 'depend' clause in depobj directive.

Added codegen for 'depend' clause in depobj directive. The depend clause
is emitted as kmp_depend_info <deps>[<number_of_items_in_clause> + 1]. The
first element in this array is reserved for storing the number of
elements in this array: <deps>[0].base_addr =
<number_of_items_in_clause>;

This extra element is required to implement 'update' and 'destroy'
clauses. It is required to know the size of array to destroy it
correctly and to update depency kind.
This commit is contained in:
Alexey Bataev 2020-03-04 14:37:51 -05:00
parent 3712edb152
commit e46f0fee30
4 changed files with 187 additions and 33 deletions

View File

@ -29,6 +29,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Value.h"
@ -5185,29 +5186,21 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
return Result;
}
void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
const OMPExecutableDirective &D,
llvm::Function *TaskFunction,
QualType SharedsTy, Address Shareds,
const Expr *IfCond,
const OMPTaskDataTy &Data) {
if (!CGF.HaveInsertPoint())
return;
TaskResultTy Result =
emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
llvm::Value *NewTask = Result.NewTask;
llvm::Function *TaskEntry = Result.TaskEntry;
llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
LValue TDBase = Result.TDBase;
const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
Address CGOpenMPRuntime::emitDependClause(
CodeGenFunction &CGF,
ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependencies,
bool ForDepobj, SourceLocation Loc) {
// Process list of dependencies.
ASTContext &C = CGM.getContext();
// Process list of dependences.
Address DependenciesArray = Address::invalid();
unsigned NumDependencies = Data.Dependences.size();
unsigned NumDependencies = Dependencies.size();
if (NumDependencies) {
// Dependence kind for RTL.
enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
enum RTLDependenceKindTy {
DepIn = 0x01,
DepInOut = 0x3,
DepMutexInOutSet = 0x4
};
enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
RecordDecl *KmpDependInfoRD;
QualType FlagsTy =
@ -5224,15 +5217,47 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
} else {
KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
}
// Define type kmp_depend_info[<Dependences.size()>];
// Define type kmp_depend_info[<Dependencies.size()>];
// For depobj reserve one extra element to store the number of elements.
// It is required to handle depobj(x) update(in) construct.
QualType KmpDependInfoArrayTy = C.getConstantArrayType(
KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
KmpDependInfoTy,
llvm::APInt(/*numBits=*/64, NumDependencies + (ForDepobj ? 1 : 0)),
nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
// kmp_depend_info[<Dependences.size()>] deps;
DependenciesArray =
CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
// kmp_depend_info[<Dependencies.size()>] deps;
if (ForDepobj) {
// Need to allocate on the dynamic memory.
llvm::Value *ThreadID = getThreadID(CGF, Loc);
// Use default allocator.
llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
CharUnits Align = C.getTypeAlignInChars(KmpDependInfoArrayTy);
CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
llvm::Value *Size = CGF.CGM.getSize(Sz.alignTo(Align));
llvm::Value *Args[] = {ThreadID, Size, Allocator};
llvm::Value *Addr = CGF.EmitRuntimeCall(
createRuntimeFunction(OMPRTL__kmpc_alloc), Args, ".dep.arr.addr");
Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
Addr, CGF.ConvertTypeForMem(KmpDependInfoArrayTy)->getPointerTo());
DependenciesArray = Address(Addr, Align);
} else {
DependenciesArray =
CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
}
if (ForDepobj) {
// Write number of elements in the first element of array for depobj.
llvm::Value *NumVal =
llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
LValue Base = CGF.MakeAddrLValue(
CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0),
KmpDependInfoTy);
// deps[i].base_addr = NumDependencies;
LValue BaseAddrLVal = CGF.EmitLValueForField(
Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
CGF.EmitStoreOfScalar(NumVal, BaseAddrLVal);
}
for (unsigned I = 0; I < NumDependencies; ++I) {
const Expr *E = Data.Dependences[I].second;
const Expr *E = Dependencies[I].second;
LValue Addr = CGF.EmitLValue(E);
llvm::Value *Size;
QualType Ty = E->getType();
@ -5249,22 +5274,23 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
} else {
Size = CGF.getTypeSize(Ty);
}
LValue Base = CGF.MakeAddrLValue(
CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
KmpDependInfoTy);
// deps[i].base_addr = &<Dependences[i].second>;
LValue Base =
CGF.MakeAddrLValue(CGF.Builder.CreateConstArrayGEP(
DependenciesArray, I + (ForDepobj ? 1 : 0)),
KmpDependInfoTy);
// deps[i].base_addr = &<Dependencies[i].second>;
LValue BaseAddrLVal = CGF.EmitLValueForField(
Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
CGF.EmitStoreOfScalar(
CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy),
BaseAddrLVal);
// deps[i].len = sizeof(<Dependences[i].second>);
// deps[i].len = sizeof(<Dependencies[i].second>);
LValue LenLVal = CGF.EmitLValueForField(
Base, *std::next(KmpDependInfoRD->field_begin(), Len));
CGF.EmitStoreOfScalar(Size, LenLVal);
// deps[i].flags = <Dependences[i].first>;
// deps[i].flags = <Dependencies[i].first>;
RTLDependenceKindTy DepKind;
switch (Data.Dependences[I].first) {
switch (Dependencies[I].first) {
case OMPC_DEPEND_in:
DepKind = DepIn;
break;
@ -5289,6 +5315,29 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
}
return DependenciesArray;
}
void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
const OMPExecutableDirective &D,
llvm::Function *TaskFunction,
QualType SharedsTy, Address Shareds,
const Expr *IfCond,
const OMPTaskDataTy &Data) {
if (!CGF.HaveInsertPoint())
return;
TaskResultTy Result =
emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
llvm::Value *NewTask = Result.NewTask;
llvm::Function *TaskEntry = Result.TaskEntry;
llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
LValue TDBase = Result.TDBase;
const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
// Process list of dependences.
Address DependenciesArray =
emitDependClause(CGF, Data.Dependences, /*ForDepobj=*/false, Loc);
unsigned NumDependencies = Data.Dependences.size();
// NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
// libcall.

View File

@ -1776,6 +1776,16 @@ public:
LValue PrivLVal,
const VarDecl *VD,
SourceLocation Loc);
/// Emits list of dependecies based on the provided data (array of
/// dependence/expression pairs).
/// \param ForDepobj true if the memory for depencies is alloacted for depobj
/// directive. In this case, the variable is allocated in dynamically.
/// \returns Pointer to the first element of the array casted to VoidPtr type.
Address emitDependClause(
CodeGenFunction &CGF,
ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependencies,
bool ForDepobj, SourceLocation Loc);
};
/// Class supports emissionof SIMD-only code.

View File

@ -3800,7 +3800,19 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
S.getBeginLoc(), AO);
}
void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {}
void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
const auto *DO = S.getSingleClause<OMPDepobjClause>();
LValue DOLVal = EmitLValue(DO->getDepobj());
if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 4>
Dependencies;
for (const Expr *IRef : DC->varlists())
Dependencies.emplace_back(DC->getDependencyKind(), IRef);
Address DepAddr = CGM.getOpenMPRuntime().emitDependClause(
*this, Dependencies, /*ForDepobj=*/true, DC->getBeginLoc());
EmitStoreOfScalar(DepAddr.getPointer(), DOLVal);
}
}
void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
const CodeGenLoopTy &CodeGenLoop,

View File

@ -0,0 +1,83 @@
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=50 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fopenmp -triple x86_64-apple-darwin10 -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -triple x86_64-apple-darwin10 -fopenmp-version=50 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fopenmp-version=50 -emit-llvm -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-apple-darwin10 -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-apple-darwin10 -fopenmp-version=50 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// CHECK-DAG: [[MAIN_A:@.+]] = internal global i8* null,
// CHECK-DAG: [[TMAIN_A:@.+]] = linkonce_odr global i8* null,
typedef void *omp_depend_t;
void foo() {}
template <class T>
T tmain(T argc) {
static T a;
#pragma omp depobj(a) depend(in:argc)
#pragma omp depobj(argc) destroy
#pragma omp depobj(argc) update(inout)
return argc;
}
int main(int argc, char **argv) {
static omp_depend_t a;
omp_depend_t b;
#pragma omp depobj(a) depend(out:argc, argv)
#pragma omp depobj(b) destroy
#pragma omp depobj(b) update(mutexinoutset)
(void)tmain(a), tmain(b);
return 0;
}
// CHECK-LABEL: @main
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
// CHECK: [[DEP_ADDR_VOID:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 72, i8* null)
// CHECK: [[DEP_ADDR:%.+]] = bitcast i8* [[DEP_ADDR_VOID]] to [3 x %struct.kmp_depend_info]*
// CHECK: [[BASE_ADDR:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[DEP_ADDR]], i{{.+}} 0, i{{.+}} 0
// CHECK: [[SZ_BASE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 0
// CHECK: store i64 2, i64* [[SZ_BASE]],
// CHECK: [[BASE_ADDR:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[DEP_ADDR]], i{{.+}} 0, i{{.+}} 1
// CHECK: [[ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 0
// CHECK: store i64 %{{.+}}, i64* [[ADDR]],
// CHECK: [[SZ_ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 1
// CHECK: store i64 4, i64* [[SZ_ADDR]],
// CHECK: [[FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 2
// CHECK: store i8 3, i8* [[FLAGS_ADDR]],
// CHECK: [[BASE_ADDR:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[DEP_ADDR]], i{{.+}} 0, i{{.+}} 2
// CHECK: [[ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 0
// CHECK: store i64 %{{.+}}, i64* [[ADDR]],
// CHECK: [[SZ_ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 1
// CHECK: store i64 8, i64* [[SZ_ADDR]],
// CHECK: [[FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 2
// CHECK: store i8 3, i8* [[FLAGS_ADDR]],
// CHECK: [[BASE_ADDR:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[DEP_ADDR]], i{{.+}} 0, i{{.+}} 0
// CHECK: [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[BASE_ADDR]] to i8*
// CHECK: store i8* [[DEP]], i8** [[MAIN_A]],
// CHECK-LABEL: tmain
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
// CHECK: [[DEP_ADDR_VOID:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 48, i8* null)
// CHECK: [[DEP_ADDR:%.+]] = bitcast i8* [[DEP_ADDR_VOID]] to [2 x %struct.kmp_depend_info]*
// CHECK: [[BASE_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DEP_ADDR]], i{{.+}} 0, i{{.+}} 0
// CHECK: [[SZ_BASE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 0
// CHECK: store i64 1, i64* [[SZ_BASE]],
// CHECK: [[BASE_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DEP_ADDR]], i{{.+}} 0, i{{.+}} 1
// CHECK: [[ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 0
// CHECK: store i64 %{{.+}}, i64* [[ADDR]],
// CHECK: [[SZ_ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 1
// CHECK: store i64 8, i64* [[SZ_ADDR]],
// CHECK: [[FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 2
// CHECK: store i8 1, i8* [[FLAGS_ADDR]],
// CHECK: [[BASE_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DEP_ADDR]], i{{.+}} 0, i{{.+}} 0
// CHECK: [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[BASE_ADDR]] to i8*
// CHECK: store i8* [[DEP]], i8** [[TMAIN_A]],
#endif