[OPENMP50]Initial codegen for 'affinity' clauses.

Summary:
Added initial codegen for 'affinity' clauses on task directives.
Emits next code:
```
kmp_task_affinity_info_t affs[<num_elems>];

void *td = __kmpc_task_alloc(..);

affs[<i>].base = &data_i;
affs[<i>].size = sizeof(data_i);
__kmpc_omp_reg_task_with_affinity(&loc, <gtid>, td, <num_elems>, affs);
```

The result returned by the call of `__kmpc_omp_reg_task_with_affinity`
function is ignored currently sincethe  runtime currently ignores args
and returns 0 uncoditionally.

Reviewers: jdoerfert

Subscribers: yaxunl, guansong, sstefan1, llvm-commits, cfe-commits, caomhin

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D80240
This commit is contained in:
Alexey Bataev 2020-05-19 16:29:36 -04:00
parent d559185aae
commit 89d9dba2c6
4 changed files with 410 additions and 108 deletions

View File

@ -4024,6 +4024,135 @@ checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
return NeedsCleanup; return NeedsCleanup;
} }
namespace {
/// Loop generator for OpenMP iterator expression.
class OMPIteratorGeneratorScope final
: public CodeGenFunction::OMPPrivateScope {
CodeGenFunction &CGF;
const OMPIteratorExpr *E = nullptr;
SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
OMPIteratorGeneratorScope() = delete;
OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
public:
OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
: CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
if (!E)
return;
SmallVector<llvm::Value *, 4> Uppers;
for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
addPrivate(VD, [&CGF, VD]() {
return CGF.CreateMemTemp(VD->getType(), VD->getName());
});
const OMPIteratorHelperData &HelperData = E->getHelper(I);
addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
"counter.addr");
});
}
Privatize();
for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
const OMPIteratorHelperData &HelperData = E->getHelper(I);
LValue CLVal =
CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
HelperData.CounterVD->getType());
// Counter = 0;
CGF.EmitStoreOfScalar(
llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
CLVal);
CodeGenFunction::JumpDest &ContDest =
ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
CodeGenFunction::JumpDest &ExitDest =
ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
// N = <number-of_iterations>;
llvm::Value *N = Uppers[I];
// cont:
// if (Counter < N) goto body; else goto exit;
CGF.EmitBlock(ContDest.getBlock());
auto *CVal =
CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
llvm::Value *Cmp =
HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
? CGF.Builder.CreateICmpSLT(CVal, N)
: CGF.Builder.CreateICmpULT(CVal, N);
llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
// body:
CGF.EmitBlock(BodyBB);
// Iteri = Begini + Counter * Stepi;
CGF.EmitIgnoredExpr(HelperData.Update);
}
}
~OMPIteratorGeneratorScope() {
if (!E)
return;
for (unsigned I = E->numOfIterators(); I > 0; --I) {
// Counter = Counter + 1;
const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
// goto cont;
CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
// exit:
CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
}
}
};
} // namespace
static std::pair<llvm::Value *, llvm::Value *>
getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
llvm::Value *Addr;
if (OASE) {
const Expr *Base = OASE->getBase();
Addr = CGF.EmitScalarExpr(Base);
} else {
Addr = CGF.EmitLValue(E).getPointer(CGF);
}
llvm::Value *SizeVal;
QualType Ty = E->getType();
if (OASE) {
SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
for (const Expr *SE : OASE->getDimensions()) {
llvm::Value *Sz = CGF.EmitScalarExpr(SE);
Sz = CGF.EmitScalarConversion(
Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
}
} else if (const auto *ASE =
dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
LValue UpAddrLVal =
CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
llvm::Value *UpAddr =
CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
} else {
SizeVal = CGF.getTypeSize(Ty);
}
return std::make_pair(Addr, SizeVal);
}
/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
if (KmpTaskAffinityInfoTy.isNull()) {
RecordDecl *KmpAffinityInfoRD =
C.buildImplicitRecord("kmp_task_affinity_info_t");
KmpAffinityInfoRD->startDefinition();
addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
KmpAffinityInfoRD->completeDefinition();
KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
}
}
CGOpenMPRuntime::TaskResultTy CGOpenMPRuntime::TaskResultTy
CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
const OMPExecutableDirective &D, const OMPExecutableDirective &D,
@ -4202,6 +4331,142 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
Evt->getExprLoc()); Evt->getExprLoc());
CGF.EmitStoreOfScalar(EvtVal, EvtLVal); CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
} }
// Process affinity clauses.
if (D.hasClausesOfKind<OMPAffinityClause>()) {
// Process list of affinity data.
ASTContext &C = CGM.getContext();
Address AffinitiesArray = Address::invalid();
// Calculate number of elements to form the array of affinity data.
llvm::Value *NumOfElements = nullptr;
unsigned NumAffinities = 0;
for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
if (const Expr *Modifier = C->getModifier()) {
const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
NumOfElements =
NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
}
} else {
NumAffinities += C->varlist_size();
}
}
getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
// Fields ids in kmp_task_affinity_info record.
enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
QualType KmpTaskAffinityInfoArrayTy;
if (NumOfElements) {
NumOfElements = CGF.Builder.CreateNUWAdd(
llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
OpaqueValueExpr OVE(
Loc,
C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
VK_RValue);
CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
RValue::get(NumOfElements));
KmpTaskAffinityInfoArrayTy =
C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
/*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
// Properly emit variable-sized array.
auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
ImplicitParamDecl::Other);
CGF.EmitVarDecl(*PD);
AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
/*isSigned=*/false);
} else {
KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
KmpTaskAffinityInfoTy,
llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
ArrayType::Normal, /*IndexTypeQuals=*/0);
AffinitiesArray =
CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
/*isSigned=*/false);
}
const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
// Fill array by elements without iterators.
unsigned Pos = 0;
bool HasIterator = false;
for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
if (C->getModifier()) {
HasIterator = true;
continue;
}
for (const Expr *E : C->varlists()) {
llvm::Value *Addr;
llvm::Value *Size;
std::tie(Addr, Size) = getPointerAndSize(CGF, E);
LValue Base =
CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
KmpTaskAffinityInfoTy);
// affs[i].base_addr = &<Affinities[i].second>;
LValue BaseAddrLVal = CGF.EmitLValueForField(
Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
BaseAddrLVal);
// affs[i].len = sizeof(<Affinities[i].second>);
LValue LenLVal = CGF.EmitLValueForField(
Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
CGF.EmitStoreOfScalar(Size, LenLVal);
++Pos;
}
}
LValue PosLVal;
if (HasIterator) {
PosLVal = CGF.MakeAddrLValue(
CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
C.getSizeType());
CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
}
// Process elements with iterators.
for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
const Expr *Modifier = C->getModifier();
if (!Modifier)
continue;
OMPIteratorGeneratorScope IteratorScope(
CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
for (const Expr *E : C->varlists()) {
llvm::Value *Addr;
llvm::Value *Size;
std::tie(Addr, Size) = getPointerAndSize(CGF, E);
llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
LValue Base = CGF.MakeAddrLValue(
Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
AffinitiesArray.getAlignment()),
KmpTaskAffinityInfoTy);
// affs[i].base_addr = &<Affinities[i].second>;
LValue BaseAddrLVal = CGF.EmitLValueForField(
Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
BaseAddrLVal);
// affs[i].len = sizeof(<Affinities[i].second>);
LValue LenLVal = CGF.EmitLValueForField(
Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
CGF.EmitStoreOfScalar(Size, LenLVal);
Idx = CGF.Builder.CreateNUWAdd(
Idx, llvm::ConstantInt::get(Idx->getType(), 1));
CGF.EmitStoreOfScalar(Idx, PosLVal);
}
}
// Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
// kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
// naffins, kmp_task_affinity_info_t *affin_list);
llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
llvm::Value *GTid = getThreadID(CGF, Loc);
llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
AffinitiesArray.getPointer(), CGM.VoidPtrTy);
// FIXME: Emit the function and ignore its result for now unless the
// runtime function is properly implemented.
(void)CGF.EmitRuntimeCall(
llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
{LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
}
llvm::Value *NewTaskNewTaskTTy = llvm::Value *NewTaskNewTaskTTy =
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
NewTask, KmpTaskTWithPrivatesPtrTy); NewTask, KmpTaskTWithPrivatesPtrTy);
@ -4350,85 +4615,6 @@ CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
return std::make_pair(NumDeps, Base); return std::make_pair(NumDeps, Base);
} }
namespace {
/// Loop generator for OpenMP iterator expression.
class OMPIteratorGeneratorScope final
: public CodeGenFunction::OMPPrivateScope {
CodeGenFunction &CGF;
const OMPIteratorExpr *E = nullptr;
SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
OMPIteratorGeneratorScope() = delete;
OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
public:
OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
: CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
if (!E)
return;
SmallVector<llvm::Value *, 4> Uppers;
for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
addPrivate(VD, [&CGF, VD]() {
return CGF.CreateMemTemp(VD->getType(), VD->getName());
});
const OMPIteratorHelperData &HelperData = E->getHelper(I);
addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
"counter.addr");
});
}
Privatize();
for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
const OMPIteratorHelperData &HelperData = E->getHelper(I);
LValue CLVal =
CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
HelperData.CounterVD->getType());
// Counter = 0;
CGF.EmitStoreOfScalar(
llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
CLVal);
CodeGenFunction::JumpDest &ContDest =
ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
CodeGenFunction::JumpDest &ExitDest =
ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
// N = <number-of_iterations>;
llvm::Value *N = Uppers[I];
// cont:
// if (Counter < N) goto body; else goto exit;
CGF.EmitBlock(ContDest.getBlock());
auto *CVal =
CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
llvm::Value *Cmp =
HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
? CGF.Builder.CreateICmpSLT(CVal, N)
: CGF.Builder.CreateICmpULT(CVal, N);
llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
// body:
CGF.EmitBlock(BodyBB);
// Iteri = Begini + Counter * Stepi;
CGF.EmitIgnoredExpr(HelperData.Update);
}
}
~OMPIteratorGeneratorScope() {
if (!E)
return;
for (unsigned I = E->numOfIterators(); I > 0; --I) {
// Counter = Counter + 1;
const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
// goto cont;
CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
// exit:
CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
}
}
};
} // namespace
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
llvm::PointerUnion<unsigned *, LValue *> Pos, llvm::PointerUnion<unsigned *, LValue *> Pos,
const OMPTaskDataTy::DependData &Data, const OMPTaskDataTy::DependData &Data,
@ -4446,37 +4632,9 @@ static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
: nullptr)); : nullptr));
for (const Expr *E : Data.DepExprs) { for (const Expr *E : Data.DepExprs) {
const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
llvm::Value *Addr; llvm::Value *Addr;
if (OASE) {
const Expr *Base = OASE->getBase();
Addr = CGF.EmitScalarExpr(Base);
} else {
Addr = CGF.EmitLValue(E).getPointer(CGF);
}
llvm::Value *Size; llvm::Value *Size;
QualType Ty = E->getType(); std::tie(Addr, Size) = getPointerAndSize(CGF, E);
if (OASE) {
Size = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
for (const Expr *SE : OASE->getDimensions()) {
llvm::Value *Sz = CGF.EmitScalarExpr(SE);
Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
CGF.getContext().getSizeType(),
SE->getExprLoc());
Size = CGF.Builder.CreateNUWMul(Size, Sz);
}
} else if (const auto *ASE =
dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
LValue UpAddrLVal =
CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGM.SizeTy);
llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
} else {
Size = CGF.getTypeSize(Ty);
}
LValue Base; LValue Base;
if (unsigned *P = Pos.dyn_cast<unsigned *>()) { if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
Base = CGF.MakeAddrLValue( Base = CGF.MakeAddrLValue(

View File

@ -458,6 +458,16 @@ private:
/// } flags; /// } flags;
/// } kmp_depend_info_t; /// } kmp_depend_info_t;
QualType KmpDependInfoTy; QualType KmpDependInfoTy;
/// Type typedef struct kmp_task_affinity_info {
/// kmp_intptr_t base_addr;
/// size_t len;
/// struct {
/// bool flag1 : 1;
/// bool flag2 : 1;
/// kmp_int32 reserved : 30;
/// } flags;
/// } kmp_task_affinity_info_t;
QualType KmpTaskAffinityInfoTy;
/// struct kmp_dim { // loop bounds info casted to kmp_int64 /// struct kmp_dim { // loop bounds info casted to kmp_int64
/// kmp_int64 lo; // lower /// kmp_int64 lo; // lower
/// kmp_int64 up; // upper /// kmp_int64 up; // upper

View File

@ -0,0 +1,132 @@
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=50 -x c++ -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
//
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// CHECK-LABEL: @main
int main() {
float *p;
int a = 10;
// kmp_task_affinity_info_t affs[1];
// CHECK: [[AFFS_ADDR:%.+]] = alloca [1 x %struct.kmp_task_affinity_info_t],
// CHECK: [[TD:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 [[GTID:%.+]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* @{{.+}} to i32 (i32, i8*)*))
// CHECK: [[AFFINE_LST_ADDR:%.+]] = getelementptr inbounds [1 x %struct.kmp_task_affinity_info_t], [1 x %struct.kmp_task_affinity_info_t]* [[AFFS_ADDR]], i64 0, i64 0
// CHECK: [[P:%.+]] = load float*, float** [[P_ADDR:%.+]],
// CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_ADDR:%.+]],
// CHECK: [[A_SZ:%.+]] = sext i32 [[A_VAL]] to i64
// CHECK: [[BYTES:%.+]] = mul nuw i64 4, [[A_SZ]]
// CHECK: [[SZ:%.+]] = mul nuw i64 [[BYTES]], 10
// CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_ADDR]],
// CHECK: [[A_SZ1:%.+]] = sext i32 [[A_VAL]] to i64
// CHECK: [[SIZE:%.+]] = mul nuw i64 [[SZ]], [[A_SZ]]
// CHECK: [[AFFS_0_ADDR:%.+]] = getelementptr %struct.kmp_task_affinity_info_t, %struct.kmp_task_affinity_info_t* [[AFFINE_LST_ADDR]], i64 0
// affs[0].base = p;
// CHECK: [[AFFS_0_BASE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_task_affinity_info_t, %struct.kmp_task_affinity_info_t* [[AFFS_0_ADDR]], i32 0, i32 0
// CHECK: [[P_INTPTR:%.+]] = ptrtoint float* [[P]] to i64
// CHECK: store i64 [[P_INTPTR]], i64* [[AFFS_0_BASE_ADDR]],
// affs[0].size = sizeof(*p) * a * 10 * a;
// CHECK: [[AFFS_0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_task_affinity_info_t, %struct.kmp_task_affinity_info_t* [[AFFS_0_ADDR]], i32 0, i32 1
// CHECK: store i64 [[SIZE]], i64* [[AFFS_0_SIZE_ADDR]],
// CHECK: [[BC:%.+]] = bitcast %struct.kmp_task_affinity_info_t* [[AFFINE_LST_ADDR]] to i8*
// CHECK: call i32 @__kmpc_omp_reg_task_with_affinity(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* [[TD]], i32 1, i8* [[BC]])
#pragma omp task affinity(([a][10][a])p)
;
// CHECK: [[TD:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 [[GTID]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* @{{.+}} to i32 (i32, i8*)*))
// CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_ADDR]],
// CHECK: [[SUB:%.+]] = sub nsw i32 [[A_VAL]], 0
// CHECK: [[CONV:%.+]] = zext i32 [[SUB]] to i64
// <num_elem> = <num_iters> + 1 constant affinity for affinity(a)
// CHECK: [[NUM_ELEMS:%.+]] = add nuw i64 1, [[CONV]]
// CHECK: [[SV:%.+]] = call i8* @llvm.stacksave()
// CHECK: store i8* [[SV]], i8** [[SV_ADDR:%.+]],
// kmp_task_affinity_info_t affs[<num_elem>];
// CHECK: [[AFFS_ADDR:%.+]] = alloca %struct.kmp_task_affinity_info_t, i64 [[NUM_ELEMS]],
// store i64 %21, i64* %__vla_expr0, align 8
// CHECK: [[NAFFS:%.+]] = trunc i64 [[NUM_ELEMS]] to i32
// CHECK: [[AFFS_0_ADDR:%.+]] = getelementptr %struct.kmp_task_affinity_info_t, %struct.kmp_task_affinity_info_t* [[AFFS_ADDR]], i64 0
// affs[0].base = &a;
// CHECK: [[AFFS_0_BASE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_task_affinity_info_t, %struct.kmp_task_affinity_info_t* [[AFFS_0_ADDR]], i32 0, i32 0
// CHECK: [[A_INTPTR:%.+]] = ptrtoint i32* [[A_ADDR]] to i64
// CHECK: store i64 [[A_INTPTR]], i64* [[AFFS_0_BASE_ADDR]],
// affs[0].size = sizeof(a);
// CHECK: [[AFFS_0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_task_affinity_info_t, %struct.kmp_task_affinity_info_t* [[AFFS_0_ADDR]], i32 0, i32 1
// CHECK: store i64 4, i64* [[AFFS_0_SIZE_ADDR]],
// affs_cnt = 1;
// CHECK: store i64 1, i64* [[AFFS_CNT_ADDR:%.+]],
// CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_ADDR]],
// CHECK: [[NITERS:%.+]] = sub nsw i32 [[A_VAL]], 0
// CHECK: store i32 0, i32* [[CNT_ADDR:%.+]],
// CHECK: br label %[[CONT:[^,]+]]
//for (int cnt = 0; cnt < (a-0); ++cnt) {
// int i = cnt + 0;
// affs[affs_cnt].base = &p[i];
// affs[affs_cnt].size = sizeof(p[i]);
// ++affs_cnt;
// }
// CHECK: [[CONT]]:
// CHECK: [[CNT:%.+]] = load i32, i32* [[CNT_ADDR]],
// CHECK: [[CMP:%.+]] = icmp slt i32 [[CNT]], [[NITERS]]
// CHECK: br i1 [[CMP]], label %[[BODY:[^,]+]], label %[[DONE:[^,]+]]
// CHECK: [[BODY]]:
// i = cnt + 0;
// CHECK: [[CNT:%.+]] = load i32, i32* [[CNT_ADDR]],
// CHECK: [[VAL:%.+]] = add nsw i32 0, [[CNT]]
// CHECK: store i32 [[VAL]], i32* [[I_ADDR:%.+]],
// &p[i]
// CHECK: [[P:%.+]] = load float*, float** [[P_ADDR]],
// CHECK: [[I:%.+]] = load i32, i32* [[I_ADDR]],
// CHECK: [[IDX:%.+]] = sext i32 [[I]] to i64
// CHECK: [[P_I_ADDR:%.+]] = getelementptr inbounds float, float* [[P]], i64 [[IDX]]
// affs[affs_cnt]
// CHECK: [[AFFS_CNT:%.+]] = load i64, i64* [[AFFS_CNT_ADDR]],
// CHECK: [[AFFS_ELEM_ADDR:%.+]] = getelementptr %struct.kmp_task_affinity_info_t, %struct.kmp_task_affinity_info_t* [[AFFS_ADDR]], i64 [[AFFS_CNT]]
// affs[affs_cnt].base = &p[i];
// CHECK: [[AFFS_ELEM_BASE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_task_affinity_info_t, %struct.kmp_task_affinity_info_t* [[AFFS_ELEM_ADDR]], i32 0, i32 0
// CHECK: [[CAST:%.+]] = ptrtoint float* [[P_I_ADDR]] to i64
// CHECK: store i64 [[CAST]], i64* [[AFFS_ELEM_BASE_ADDR]],
// affs[affs_cnt].size = sizeof(p[i]);
// CHECK: [[AFFS_ELEM_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_task_affinity_info_t, %struct.kmp_task_affinity_info_t* [[AFFS_ELEM_ADDR]], i32 0, i32 1
// CHECK: store i64 4, i64* [[AFFS_ELEM_SIZE_ADDR]],
// ++affs_cnt;
// CHECK: [[AFFS_CNT_NEXT:%.+]] = add nuw i64 [[AFFS_CNT]], 1
// CHECK: store i64 [[AFFS_CNT_NEXT]], i64* [[AFFS_CNT_ADDR]],
// ++cnt;
// CHECK: [[CNT:%.+]] = load i32, i32* [[CNT_ADDR]],
// CHECK: [[CNT_NEXT:%.+]] = add nsw i32 [[CNT]], 1
// CHECK: store i32 [[CNT_NEXT]], i32* [[CNT_ADDR]],
// CHECK: br label %[[CONT]]
// CHECK: [[DONE]]:
// CHECK: [[BC:%.+]] = bitcast %struct.kmp_task_affinity_info_t* [[AFFS_ADDR]] to i8*
// CHECK: call i32 @__kmpc_omp_reg_task_with_affinity(%struct.ident_t* @{{.+}} i32 [[GTID]], i8* [[TD]], i32 [[NAFFS]], i8* [[BC]])
// CHECK: [[SV:%.+]] = load i8*, i8** [[SV_ADDR]],
// CHECK: call void @llvm.stackrestore(i8* [[SV]])
#pragma omp task affinity(iterator(i=0:a): p[i]) affinity(a)
;
return 0;
}
#endif

View File

@ -336,6 +336,8 @@ __OMP_RTL(__kmpc_push_num_threads, false, Void, IdentPtr, Int32,
__OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */ Int32) __OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */ Int32)
__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32)
__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32)
__OMP_RTL(__kmpc_omp_reg_task_with_affinity, false, Int32, IdentPtr, Int32,
Int8Ptr, Int32, Int8Ptr)
__OMP_RTL(omp_get_thread_num, false, Int32, ) __OMP_RTL(omp_get_thread_num, false, Int32, )
__OMP_RTL(omp_get_num_threads, false, Int32, ) __OMP_RTL(omp_get_num_threads, false, Int32, )