forked from OSchip/llvm-project
[OPENMP] Add codegen for `depend` clauses on `target` directive.
Added basic support for codegen of `depend` clauses on `target` directive. llvm-svn: 322501
This commit is contained in:
parent
62875fcd6c
commit
8451efad89
|
@ -3192,14 +3192,17 @@ public:
|
||||||
/// \brief Build 'device' clause.
|
/// \brief Build 'device' clause.
|
||||||
///
|
///
|
||||||
/// \param E Expression associated with this clause.
|
/// \param E Expression associated with this clause.
|
||||||
|
/// \param CaptureRegion Innermost OpenMP region where expressions in this
|
||||||
|
/// clause must be captured.
|
||||||
/// \param StartLoc Starting location of the clause.
|
/// \param StartLoc Starting location of the clause.
|
||||||
/// \param LParenLoc Location of '('.
|
/// \param LParenLoc Location of '('.
|
||||||
/// \param EndLoc Ending location of the clause.
|
/// \param EndLoc Ending location of the clause.
|
||||||
OMPDeviceClause(Expr *E, Stmt *HelperE, SourceLocation StartLoc,
|
OMPDeviceClause(Expr *E, Stmt *HelperE, OpenMPDirectiveKind CaptureRegion,
|
||||||
SourceLocation LParenLoc, SourceLocation EndLoc)
|
SourceLocation StartLoc, SourceLocation LParenLoc,
|
||||||
|
SourceLocation EndLoc)
|
||||||
: OMPClause(OMPC_device, StartLoc, EndLoc), OMPClauseWithPreInit(this),
|
: OMPClause(OMPC_device, StartLoc, EndLoc), OMPClauseWithPreInit(this),
|
||||||
LParenLoc(LParenLoc), Device(E) {
|
LParenLoc(LParenLoc), Device(E) {
|
||||||
setPreInitStmt(HelperE);
|
setPreInitStmt(HelperE, CaptureRegion);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \brief Build an empty clause.
|
/// \brief Build an empty clause.
|
||||||
|
|
|
@ -891,6 +891,7 @@ void clang::getOpenMPCaptureRegions(
|
||||||
case OMPD_target_teams:
|
case OMPD_target_teams:
|
||||||
case OMPD_target_teams_distribute:
|
case OMPD_target_teams_distribute:
|
||||||
case OMPD_target_teams_distribute_simd:
|
case OMPD_target_teams_distribute_simd:
|
||||||
|
CaptureRegions.push_back(OMPD_task);
|
||||||
CaptureRegions.push_back(OMPD_target);
|
CaptureRegions.push_back(OMPD_target);
|
||||||
CaptureRegions.push_back(OMPD_teams);
|
CaptureRegions.push_back(OMPD_teams);
|
||||||
break;
|
break;
|
||||||
|
@ -901,6 +902,7 @@ void clang::getOpenMPCaptureRegions(
|
||||||
break;
|
break;
|
||||||
case OMPD_target:
|
case OMPD_target:
|
||||||
case OMPD_target_simd:
|
case OMPD_target_simd:
|
||||||
|
CaptureRegions.push_back(OMPD_task);
|
||||||
CaptureRegions.push_back(OMPD_target);
|
CaptureRegions.push_back(OMPD_target);
|
||||||
break;
|
break;
|
||||||
case OMPD_teams_distribute_parallel_for:
|
case OMPD_teams_distribute_parallel_for:
|
||||||
|
@ -911,6 +913,7 @@ void clang::getOpenMPCaptureRegions(
|
||||||
case OMPD_target_parallel:
|
case OMPD_target_parallel:
|
||||||
case OMPD_target_parallel_for:
|
case OMPD_target_parallel_for:
|
||||||
case OMPD_target_parallel_for_simd:
|
case OMPD_target_parallel_for_simd:
|
||||||
|
CaptureRegions.push_back(OMPD_task);
|
||||||
CaptureRegions.push_back(OMPD_target);
|
CaptureRegions.push_back(OMPD_target);
|
||||||
CaptureRegions.push_back(OMPD_parallel);
|
CaptureRegions.push_back(OMPD_parallel);
|
||||||
break;
|
break;
|
||||||
|
@ -925,6 +928,7 @@ void clang::getOpenMPCaptureRegions(
|
||||||
CaptureRegions.push_back(OMPD_taskloop);
|
CaptureRegions.push_back(OMPD_taskloop);
|
||||||
break;
|
break;
|
||||||
case OMPD_target_teams_distribute_parallel_for:
|
case OMPD_target_teams_distribute_parallel_for:
|
||||||
|
CaptureRegions.push_back(OMPD_task);
|
||||||
CaptureRegions.push_back(OMPD_target);
|
CaptureRegions.push_back(OMPD_target);
|
||||||
CaptureRegions.push_back(OMPD_teams);
|
CaptureRegions.push_back(OMPD_teams);
|
||||||
CaptureRegions.push_back(OMPD_parallel);
|
CaptureRegions.push_back(OMPD_parallel);
|
||||||
|
|
|
@ -4187,6 +4187,11 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
|
||||||
auto &C = CGF.getContext();
|
auto &C = CGF.getContext();
|
||||||
auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
|
auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
|
||||||
LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
|
LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
|
||||||
|
OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
|
||||||
|
? OMPD_taskloop
|
||||||
|
: OMPD_task;
|
||||||
|
const CapturedStmt &CS = *D.getCapturedStmt(Kind);
|
||||||
|
CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
|
||||||
LValue SrcBase;
|
LValue SrcBase;
|
||||||
bool IsTargetTask =
|
bool IsTargetTask =
|
||||||
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
|
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
|
||||||
|
@ -4195,16 +4200,12 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
|
||||||
// PointersArray and SizesArray. The original variables for these arrays are
|
// PointersArray and SizesArray. The original variables for these arrays are
|
||||||
// not captured and we get their addresses explicitly.
|
// not captured and we get their addresses explicitly.
|
||||||
if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
|
if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
|
||||||
(IsTargetTask && Data.FirstprivateVars.size() > 3)) {
|
(IsTargetTask && KmpTaskSharedsPtr.isValid())) {
|
||||||
SrcBase = CGF.MakeAddrLValue(
|
SrcBase = CGF.MakeAddrLValue(
|
||||||
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
||||||
KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
|
KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
|
||||||
SharedsTy);
|
SharedsTy);
|
||||||
}
|
}
|
||||||
OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
|
|
||||||
? OMPD_taskloop
|
|
||||||
: OMPD_task;
|
|
||||||
CodeGenFunction::CGCapturedStmtInfo CapturesInfo(*D.getCapturedStmt(Kind));
|
|
||||||
FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
|
FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
|
||||||
for (auto &&Pair : Privates) {
|
for (auto &&Pair : Privates) {
|
||||||
auto *VD = Pair.second.PrivateCopy;
|
auto *VD = Pair.second.PrivateCopy;
|
||||||
|
@ -4218,17 +4219,19 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
|
||||||
// PointersArray or SizesArray.
|
// PointersArray or SizesArray.
|
||||||
LValue SharedRefLValue;
|
LValue SharedRefLValue;
|
||||||
QualType Type = OriginalVD->getType();
|
QualType Type = OriginalVD->getType();
|
||||||
if (IsTargetTask && isa<ImplicitParamDecl>(OriginalVD) &&
|
auto *SharedField = CapturesInfo.lookup(OriginalVD);
|
||||||
isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
|
if (IsTargetTask && !SharedField) {
|
||||||
cast<CapturedDecl>(OriginalVD->getDeclContext())->getNumParams() ==
|
assert(isa<ImplicitParamDecl>(OriginalVD) &&
|
||||||
0 &&
|
isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
|
||||||
isa<TranslationUnitDecl>(
|
cast<CapturedDecl>(OriginalVD->getDeclContext())
|
||||||
cast<CapturedDecl>(OriginalVD->getDeclContext())
|
->getNumParams() == 0 &&
|
||||||
->getDeclContext())) {
|
isa<TranslationUnitDecl>(
|
||||||
|
cast<CapturedDecl>(OriginalVD->getDeclContext())
|
||||||
|
->getDeclContext()) &&
|
||||||
|
"Expected artificial target data variable.");
|
||||||
SharedRefLValue =
|
SharedRefLValue =
|
||||||
CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
|
CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
|
||||||
} else {
|
} else {
|
||||||
auto *SharedField = CapturesInfo.lookup(OriginalVD);
|
|
||||||
SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
|
SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
|
||||||
SharedRefLValue = CGF.MakeAddrLValue(
|
SharedRefLValue = CGF.MakeAddrLValue(
|
||||||
Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
|
Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
|
||||||
|
@ -7040,86 +7043,27 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
|
||||||
const OMPExecutableDirective &D,
|
const OMPExecutableDirective &D,
|
||||||
llvm::Value *OutlinedFn,
|
llvm::Value *OutlinedFn,
|
||||||
llvm::Value *OutlinedFnID,
|
llvm::Value *OutlinedFnID,
|
||||||
const Expr *IfCond, const Expr *Device,
|
const Expr *IfCond, const Expr *Device) {
|
||||||
ArrayRef<llvm::Value *> CapturedVars) {
|
|
||||||
if (!CGF.HaveInsertPoint())
|
if (!CGF.HaveInsertPoint())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
assert(OutlinedFn && "Invalid outlined function!");
|
assert(OutlinedFn && "Invalid outlined function!");
|
||||||
|
|
||||||
// Fill up the arrays with all the captured variables.
|
const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
|
||||||
MappableExprsHandler::MapValuesArrayTy KernelArgs;
|
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
|
||||||
MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
|
|
||||||
MappableExprsHandler::MapValuesArrayTy Pointers;
|
|
||||||
MappableExprsHandler::MapValuesArrayTy Sizes;
|
|
||||||
MappableExprsHandler::MapFlagsArrayTy MapTypes;
|
|
||||||
|
|
||||||
MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
|
|
||||||
MappableExprsHandler::MapValuesArrayTy CurPointers;
|
|
||||||
MappableExprsHandler::MapValuesArrayTy CurSizes;
|
|
||||||
MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
|
|
||||||
|
|
||||||
// Get mappable expression information.
|
|
||||||
MappableExprsHandler MEHandler(D, CGF);
|
|
||||||
|
|
||||||
const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
|
const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
|
||||||
auto RI = CS.getCapturedRecordDecl()->field_begin();
|
auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
|
||||||
auto CV = CapturedVars.begin();
|
PrePostActionTy &) {
|
||||||
for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
|
CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
|
||||||
CE = CS.capture_end();
|
};
|
||||||
CI != CE; ++CI, ++RI, ++CV) {
|
emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
|
||||||
CurBasePointers.clear();
|
|
||||||
CurPointers.clear();
|
|
||||||
CurSizes.clear();
|
|
||||||
CurMapTypes.clear();
|
|
||||||
|
|
||||||
// VLA sizes are passed to the outlined region by copy and do not have map
|
|
||||||
// information associated.
|
|
||||||
if (CI->capturesVariableArrayType()) {
|
|
||||||
CurBasePointers.push_back(*CV);
|
|
||||||
CurPointers.push_back(*CV);
|
|
||||||
CurSizes.push_back(CGF.getTypeSize(RI->getType()));
|
|
||||||
// Copy to the device as an argument. No need to retrieve it.
|
|
||||||
CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
|
|
||||||
MappableExprsHandler::OMP_MAP_TARGET_PARAM);
|
|
||||||
} else {
|
|
||||||
// If we have any information in the map clause, we use it, otherwise we
|
|
||||||
// just do a default mapping.
|
|
||||||
MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
|
|
||||||
CurSizes, CurMapTypes);
|
|
||||||
if (CurBasePointers.empty())
|
|
||||||
MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
|
|
||||||
CurPointers, CurSizes, CurMapTypes);
|
|
||||||
}
|
|
||||||
// We expect to have at least an element of information for this capture.
|
|
||||||
assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!");
|
|
||||||
assert(CurBasePointers.size() == CurPointers.size() &&
|
|
||||||
CurBasePointers.size() == CurSizes.size() &&
|
|
||||||
CurBasePointers.size() == CurMapTypes.size() &&
|
|
||||||
"Inconsistent map information sizes!");
|
|
||||||
|
|
||||||
// The kernel args are always the first elements of the base pointers
|
|
||||||
// associated with a capture.
|
|
||||||
KernelArgs.push_back(*CurBasePointers.front());
|
|
||||||
// We need to append the results of this capture to what we already have.
|
|
||||||
BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
|
|
||||||
Pointers.append(CurPointers.begin(), CurPointers.end());
|
|
||||||
Sizes.append(CurSizes.begin(), CurSizes.end());
|
|
||||||
MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
CodeGenFunction::OMPTargetDataInfo InputInfo;
|
||||||
|
llvm::Value *MapTypesArray = nullptr;
|
||||||
// Fill up the pointer arrays and transfer execution to the device.
|
// Fill up the pointer arrays and transfer execution to the device.
|
||||||
auto &&ThenGen = [this, &BasePointers, &Pointers, &Sizes, &MapTypes, Device,
|
auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
|
||||||
OutlinedFn, OutlinedFnID, &D,
|
&MapTypesArray, &CS, RequiresOuterTask,
|
||||||
&KernelArgs](CodeGenFunction &CGF, PrePostActionTy &) {
|
&CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
|
||||||
auto &RT = CGF.CGM.getOpenMPRuntime();
|
|
||||||
// Emit the offloading arrays.
|
|
||||||
TargetDataInfo Info;
|
|
||||||
emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
|
|
||||||
emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
|
|
||||||
Info.PointersArray, Info.SizesArray,
|
|
||||||
Info.MapTypesArray, Info);
|
|
||||||
|
|
||||||
// On top of the arrays that were filled up, the target offloading call
|
// On top of the arrays that were filled up, the target offloading call
|
||||||
// takes as arguments the device id as well as the host pointer. The host
|
// takes as arguments the device id as well as the host pointer. The host
|
||||||
// pointer is used by the runtime library to identify the current target
|
// pointer is used by the runtime library to identify the current target
|
||||||
|
@ -7142,13 +7086,14 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Emit the number of elements in the offloading arrays.
|
// Emit the number of elements in the offloading arrays.
|
||||||
llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size());
|
llvm::Value *PointerNum =
|
||||||
|
CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
|
||||||
|
|
||||||
// Return value of the runtime offloading call.
|
// Return value of the runtime offloading call.
|
||||||
llvm::Value *Return;
|
llvm::Value *Return;
|
||||||
|
|
||||||
auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D);
|
auto *NumTeams = emitNumTeamsForTargetDirective(*this, CGF, D);
|
||||||
auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D);
|
auto *NumThreads = emitNumThreadsForTargetDirective(*this, CGF, D);
|
||||||
|
|
||||||
bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
|
bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
|
||||||
// The target region is an outlined function launched by the runtime
|
// The target region is an outlined function launched by the runtime
|
||||||
|
@ -7186,25 +7131,30 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
|
||||||
// passed to the runtime library - a 32-bit integer with the value zero.
|
// passed to the runtime library - a 32-bit integer with the value zero.
|
||||||
assert(NumThreads && "Thread limit expression should be available along "
|
assert(NumThreads && "Thread limit expression should be available along "
|
||||||
"with number of teams.");
|
"with number of teams.");
|
||||||
llvm::Value *OffloadingArgs[] = {
|
llvm::Value *OffloadingArgs[] = {DeviceID,
|
||||||
DeviceID, OutlinedFnID,
|
OutlinedFnID,
|
||||||
PointerNum, Info.BasePointersArray,
|
PointerNum,
|
||||||
Info.PointersArray, Info.SizesArray,
|
InputInfo.BasePointersArray.getPointer(),
|
||||||
Info.MapTypesArray, NumTeams,
|
InputInfo.PointersArray.getPointer(),
|
||||||
NumThreads};
|
InputInfo.SizesArray.getPointer(),
|
||||||
|
MapTypesArray,
|
||||||
|
NumTeams,
|
||||||
|
NumThreads};
|
||||||
Return = CGF.EmitRuntimeCall(
|
Return = CGF.EmitRuntimeCall(
|
||||||
RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
|
createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
|
||||||
: OMPRTL__tgt_target_teams),
|
: OMPRTL__tgt_target_teams),
|
||||||
OffloadingArgs);
|
OffloadingArgs);
|
||||||
} else {
|
} else {
|
||||||
llvm::Value *OffloadingArgs[] = {
|
llvm::Value *OffloadingArgs[] = {DeviceID,
|
||||||
DeviceID, OutlinedFnID,
|
OutlinedFnID,
|
||||||
PointerNum, Info.BasePointersArray,
|
PointerNum,
|
||||||
Info.PointersArray, Info.SizesArray,
|
InputInfo.BasePointersArray.getPointer(),
|
||||||
Info.MapTypesArray};
|
InputInfo.PointersArray.getPointer(),
|
||||||
|
InputInfo.SizesArray.getPointer(),
|
||||||
|
MapTypesArray};
|
||||||
Return = CGF.EmitRuntimeCall(
|
Return = CGF.EmitRuntimeCall(
|
||||||
RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
|
createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
|
||||||
: OMPRTL__tgt_target),
|
: OMPRTL__tgt_target),
|
||||||
OffloadingArgs);
|
OffloadingArgs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7217,17 +7167,114 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
|
||||||
CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
|
CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
|
||||||
|
|
||||||
CGF.EmitBlock(OffloadFailedBlock);
|
CGF.EmitBlock(OffloadFailedBlock);
|
||||||
emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, KernelArgs);
|
if (RequiresOuterTask) {
|
||||||
|
CapturedVars.clear();
|
||||||
|
CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
|
||||||
|
}
|
||||||
|
emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars);
|
||||||
CGF.EmitBranch(OffloadContBlock);
|
CGF.EmitBranch(OffloadContBlock);
|
||||||
|
|
||||||
CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
|
CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
|
||||||
};
|
};
|
||||||
|
|
||||||
// Notify that the host version must be executed.
|
// Notify that the host version must be executed.
|
||||||
auto &&ElseGen = [this, &D, OutlinedFn, &KernelArgs](CodeGenFunction &CGF,
|
auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
|
||||||
PrePostActionTy &) {
|
RequiresOuterTask](CodeGenFunction &CGF,
|
||||||
emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn,
|
PrePostActionTy &) {
|
||||||
KernelArgs);
|
if (RequiresOuterTask) {
|
||||||
|
CapturedVars.clear();
|
||||||
|
CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
|
||||||
|
}
|
||||||
|
emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
|
||||||
|
&CapturedVars, RequiresOuterTask,
|
||||||
|
&CS](CodeGenFunction &CGF, PrePostActionTy &) {
|
||||||
|
// Fill up the arrays with all the captured variables.
|
||||||
|
MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
|
||||||
|
MappableExprsHandler::MapValuesArrayTy Pointers;
|
||||||
|
MappableExprsHandler::MapValuesArrayTy Sizes;
|
||||||
|
MappableExprsHandler::MapFlagsArrayTy MapTypes;
|
||||||
|
|
||||||
|
MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
|
||||||
|
MappableExprsHandler::MapValuesArrayTy CurPointers;
|
||||||
|
MappableExprsHandler::MapValuesArrayTy CurSizes;
|
||||||
|
MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
|
||||||
|
|
||||||
|
// Get mappable expression information.
|
||||||
|
MappableExprsHandler MEHandler(D, CGF);
|
||||||
|
|
||||||
|
auto RI = CS.getCapturedRecordDecl()->field_begin();
|
||||||
|
auto CV = CapturedVars.begin();
|
||||||
|
for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
|
||||||
|
CE = CS.capture_end();
|
||||||
|
CI != CE; ++CI, ++RI, ++CV) {
|
||||||
|
CurBasePointers.clear();
|
||||||
|
CurPointers.clear();
|
||||||
|
CurSizes.clear();
|
||||||
|
CurMapTypes.clear();
|
||||||
|
|
||||||
|
// VLA sizes are passed to the outlined region by copy and do not have map
|
||||||
|
// information associated.
|
||||||
|
if (CI->capturesVariableArrayType()) {
|
||||||
|
CurBasePointers.push_back(*CV);
|
||||||
|
CurPointers.push_back(*CV);
|
||||||
|
CurSizes.push_back(CGF.getTypeSize(RI->getType()));
|
||||||
|
// Copy to the device as an argument. No need to retrieve it.
|
||||||
|
CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
|
||||||
|
MappableExprsHandler::OMP_MAP_TARGET_PARAM);
|
||||||
|
} else {
|
||||||
|
// If we have any information in the map clause, we use it, otherwise we
|
||||||
|
// just do a default mapping.
|
||||||
|
MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
|
||||||
|
CurSizes, CurMapTypes);
|
||||||
|
if (CurBasePointers.empty())
|
||||||
|
MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
|
||||||
|
CurPointers, CurSizes, CurMapTypes);
|
||||||
|
}
|
||||||
|
// We expect to have at least an element of information for this capture.
|
||||||
|
assert(!CurBasePointers.empty() &&
|
||||||
|
"Non-existing map pointer for capture!");
|
||||||
|
assert(CurBasePointers.size() == CurPointers.size() &&
|
||||||
|
CurBasePointers.size() == CurSizes.size() &&
|
||||||
|
CurBasePointers.size() == CurMapTypes.size() &&
|
||||||
|
"Inconsistent map information sizes!");
|
||||||
|
|
||||||
|
// We need to append the results of this capture to what we already have.
|
||||||
|
BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
|
||||||
|
Pointers.append(CurPointers.begin(), CurPointers.end());
|
||||||
|
Sizes.append(CurSizes.begin(), CurSizes.end());
|
||||||
|
MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
TargetDataInfo Info;
|
||||||
|
// Fill up the arrays and create the arguments.
|
||||||
|
emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
|
||||||
|
emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
|
||||||
|
Info.PointersArray, Info.SizesArray,
|
||||||
|
Info.MapTypesArray, Info);
|
||||||
|
InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
|
||||||
|
InputInfo.BasePointersArray =
|
||||||
|
Address(Info.BasePointersArray, CGM.getPointerAlign());
|
||||||
|
InputInfo.PointersArray =
|
||||||
|
Address(Info.PointersArray, CGM.getPointerAlign());
|
||||||
|
InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
|
||||||
|
MapTypesArray = Info.MapTypesArray;
|
||||||
|
if (RequiresOuterTask)
|
||||||
|
CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
|
||||||
|
else
|
||||||
|
emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
|
||||||
|
CodeGenFunction &CGF, PrePostActionTy &) {
|
||||||
|
if (RequiresOuterTask) {
|
||||||
|
CodeGenFunction::OMPTargetDataInfo InputInfo;
|
||||||
|
CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
|
||||||
|
} else {
|
||||||
|
emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// If we have a target function ID it means that we need to support
|
// If we have a target function ID it means that we need to support
|
||||||
|
@ -7235,14 +7282,14 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
|
||||||
// regardless of the conditional in the if clause if, e.g., the user do not
|
// regardless of the conditional in the if clause if, e.g., the user do not
|
||||||
// specify target triples.
|
// specify target triples.
|
||||||
if (OutlinedFnID) {
|
if (OutlinedFnID) {
|
||||||
if (IfCond)
|
if (IfCond) {
|
||||||
emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
|
emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
|
||||||
else {
|
} else {
|
||||||
RegionCodeGenTy ThenRCG(ThenGen);
|
RegionCodeGenTy ThenRCG(TargetThenGen);
|
||||||
ThenRCG(CGF);
|
ThenRCG(CGF);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
RegionCodeGenTy ElseRCG(ElseGen);
|
RegionCodeGenTy ElseRCG(TargetElseGen);
|
||||||
ElseRCG(CGF);
|
ElseRCG(CGF);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -8260,8 +8307,7 @@ void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
|
||||||
const OMPExecutableDirective &D,
|
const OMPExecutableDirective &D,
|
||||||
llvm::Value *OutlinedFn,
|
llvm::Value *OutlinedFn,
|
||||||
llvm::Value *OutlinedFnID,
|
llvm::Value *OutlinedFnID,
|
||||||
const Expr *IfCond, const Expr *Device,
|
const Expr *IfCond, const Expr *Device) {
|
||||||
ArrayRef<llvm::Value *> CapturedVars) {
|
|
||||||
llvm_unreachable("Not supported in SIMD-only mode");
|
llvm_unreachable("Not supported in SIMD-only mode");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1207,13 +1207,11 @@ public:
|
||||||
/// directive, or null if no if clause is used.
|
/// directive, or null if no if clause is used.
|
||||||
/// \param Device Expression evaluated in device clause associated with the
|
/// \param Device Expression evaluated in device clause associated with the
|
||||||
/// target directive, or null if no device clause is used.
|
/// target directive, or null if no device clause is used.
|
||||||
/// \param CapturedVars Values captured in the current region.
|
|
||||||
virtual void emitTargetCall(CodeGenFunction &CGF,
|
virtual void emitTargetCall(CodeGenFunction &CGF,
|
||||||
const OMPExecutableDirective &D,
|
const OMPExecutableDirective &D,
|
||||||
llvm::Value *OutlinedFn,
|
llvm::Value *OutlinedFn,
|
||||||
llvm::Value *OutlinedFnID, const Expr *IfCond,
|
llvm::Value *OutlinedFnID, const Expr *IfCond,
|
||||||
const Expr *Device,
|
const Expr *Device);
|
||||||
ArrayRef<llvm::Value *> CapturedVars);
|
|
||||||
|
|
||||||
/// \brief Emit the target regions enclosed in \a GD function definition or
|
/// \brief Emit the target regions enclosed in \a GD function definition or
|
||||||
/// the function itself in case it is a valid device function. Returns true if
|
/// the function itself in case it is a valid device function. Returns true if
|
||||||
|
@ -1833,11 +1831,9 @@ public:
|
||||||
/// directive, or null if no if clause is used.
|
/// directive, or null if no if clause is used.
|
||||||
/// \param Device Expression evaluated in device clause associated with the
|
/// \param Device Expression evaluated in device clause associated with the
|
||||||
/// target directive, or null if no device clause is used.
|
/// target directive, or null if no device clause is used.
|
||||||
/// \param CapturedVars Values captured in the current region.
|
|
||||||
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
|
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
|
||||||
llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID,
|
llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID,
|
||||||
const Expr *IfCond, const Expr *Device,
|
const Expr *IfCond, const Expr *Device) override;
|
||||||
ArrayRef<llvm::Value *> CapturedVars) override;
|
|
||||||
|
|
||||||
/// \brief Emit the target regions enclosed in \a GD function definition or
|
/// \brief Emit the target regions enclosed in \a GD function definition or
|
||||||
/// the function itself in case it is a valid device function. Returns true if
|
/// the function itself in case it is a valid device function. Returns true if
|
||||||
|
|
|
@ -3091,12 +3091,14 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
|
||||||
}
|
}
|
||||||
// Privatize all private variables except for in_reduction items.
|
// Privatize all private variables except for in_reduction items.
|
||||||
(void)Scope.Privatize();
|
(void)Scope.Privatize();
|
||||||
InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
|
if (InputInfo.NumberOfTargetItems > 0) {
|
||||||
CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize());
|
InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
|
||||||
InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
|
CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize());
|
||||||
CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize());
|
InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
|
||||||
InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
|
CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize());
|
||||||
CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize());
|
InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
|
||||||
|
CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize());
|
||||||
|
}
|
||||||
|
|
||||||
Action.Enter(CGF);
|
Action.Enter(CGF);
|
||||||
OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
|
OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
|
||||||
|
@ -3910,7 +3912,6 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
|
||||||
const RegionCodeGenTy &CodeGen) {
|
const RegionCodeGenTy &CodeGen) {
|
||||||
assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
|
assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
|
||||||
CodeGenModule &CGM = CGF.CGM;
|
CodeGenModule &CGM = CGF.CGM;
|
||||||
const CapturedStmt &CS = *S.getCapturedStmt(OMPD_target);
|
|
||||||
|
|
||||||
llvm::Function *Fn = nullptr;
|
llvm::Function *Fn = nullptr;
|
||||||
llvm::Constant *FnID = nullptr;
|
llvm::Constant *FnID = nullptr;
|
||||||
|
@ -3958,11 +3959,8 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
|
||||||
// Emit target region as a standalone region.
|
// Emit target region as a standalone region.
|
||||||
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
|
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
|
||||||
IsOffloadEntry, CodeGen);
|
IsOffloadEntry, CodeGen);
|
||||||
OMPLexicalScope Scope(CGF, S);
|
OMPLexicalScope Scope(CGF, S, OMPD_task);
|
||||||
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
|
CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device);
|
||||||
CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
|
|
||||||
CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
|
|
||||||
CapturedVars);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
|
static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
|
||||||
|
|
|
@ -2121,13 +2121,28 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
|
||||||
case OMPD_target_parallel_for_simd:
|
case OMPD_target_parallel_for_simd:
|
||||||
case OMPD_target_teams_distribute:
|
case OMPD_target_teams_distribute:
|
||||||
case OMPD_target_teams_distribute_simd: {
|
case OMPD_target_teams_distribute_simd: {
|
||||||
|
QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
|
||||||
|
QualType Args[] = {Context.VoidPtrTy.withConst().withRestrict()};
|
||||||
|
FunctionProtoType::ExtProtoInfo EPI;
|
||||||
|
EPI.Variadic = true;
|
||||||
|
QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI);
|
||||||
|
Sema::CapturedParamNameType Params[] = {
|
||||||
|
std::make_pair(".global_tid.", KmpInt32Ty),
|
||||||
|
std::make_pair(".part_id.", Context.getPointerType(KmpInt32Ty)),
|
||||||
|
std::make_pair(".privates.", Context.VoidPtrTy.withConst()),
|
||||||
|
std::make_pair(".copy_fn.",
|
||||||
|
Context.getPointerType(CopyFnType).withConst()),
|
||||||
|
std::make_pair(".task_t.", Context.VoidPtrTy.withConst()),
|
||||||
|
std::make_pair(StringRef(), QualType()) // __context with shared vars
|
||||||
|
};
|
||||||
|
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
|
||||||
|
Params);
|
||||||
Sema::CapturedParamNameType ParamsTarget[] = {
|
Sema::CapturedParamNameType ParamsTarget[] = {
|
||||||
std::make_pair(StringRef(), QualType()) // __context with shared vars
|
std::make_pair(StringRef(), QualType()) // __context with shared vars
|
||||||
};
|
};
|
||||||
// Start a captured region for 'target' with no implicit parameters.
|
// Start a captured region for 'target' with no implicit parameters.
|
||||||
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
|
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
|
||||||
ParamsTarget);
|
ParamsTarget);
|
||||||
QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
|
|
||||||
QualType KmpInt32PtrTy =
|
QualType KmpInt32PtrTy =
|
||||||
Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
|
Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
|
||||||
Sema::CapturedParamNameType ParamsTeamsOrParallel[] = {
|
Sema::CapturedParamNameType ParamsTeamsOrParallel[] = {
|
||||||
|
@ -2141,6 +2156,33 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
|
||||||
ParamsTeamsOrParallel);
|
ParamsTeamsOrParallel);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case OMPD_target:
|
||||||
|
case OMPD_target_simd: {
|
||||||
|
QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
|
||||||
|
QualType Args[] = {Context.VoidPtrTy.withConst().withRestrict()};
|
||||||
|
FunctionProtoType::ExtProtoInfo EPI;
|
||||||
|
EPI.Variadic = true;
|
||||||
|
QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI);
|
||||||
|
Sema::CapturedParamNameType Params[] = {
|
||||||
|
std::make_pair(".global_tid.", KmpInt32Ty),
|
||||||
|
std::make_pair(".part_id.", Context.getPointerType(KmpInt32Ty)),
|
||||||
|
std::make_pair(".privates.", Context.VoidPtrTy.withConst()),
|
||||||
|
std::make_pair(".copy_fn.",
|
||||||
|
Context.getPointerType(CopyFnType).withConst()),
|
||||||
|
std::make_pair(".task_t.", Context.VoidPtrTy.withConst()),
|
||||||
|
std::make_pair(StringRef(), QualType()) // __context with shared vars
|
||||||
|
};
|
||||||
|
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
|
||||||
|
Params);
|
||||||
|
// Mark this captured region as inlined, because we don't use outlined
|
||||||
|
// function directly.
|
||||||
|
getCurCapturedRegion()->TheCapturedDecl->addAttr(
|
||||||
|
AlwaysInlineAttr::CreateImplicit(
|
||||||
|
Context, AlwaysInlineAttr::Keyword_forceinline, SourceRange()));
|
||||||
|
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
|
||||||
|
std::make_pair(StringRef(), QualType()));
|
||||||
|
break;
|
||||||
|
}
|
||||||
case OMPD_simd:
|
case OMPD_simd:
|
||||||
case OMPD_for:
|
case OMPD_for:
|
||||||
case OMPD_for_simd:
|
case OMPD_for_simd:
|
||||||
|
@ -2154,9 +2196,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
|
||||||
case OMPD_distribute_simd:
|
case OMPD_distribute_simd:
|
||||||
case OMPD_ordered:
|
case OMPD_ordered:
|
||||||
case OMPD_atomic:
|
case OMPD_atomic:
|
||||||
case OMPD_target_data:
|
case OMPD_target_data: {
|
||||||
case OMPD_target:
|
|
||||||
case OMPD_target_simd: {
|
|
||||||
Sema::CapturedParamNameType Params[] = {
|
Sema::CapturedParamNameType Params[] = {
|
||||||
std::make_pair(StringRef(), QualType()) // __context with shared vars
|
std::make_pair(StringRef(), QualType()) // __context with shared vars
|
||||||
};
|
};
|
||||||
|
@ -2247,6 +2287,21 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
|
||||||
QualType KmpInt32PtrTy =
|
QualType KmpInt32PtrTy =
|
||||||
Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
|
Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
|
||||||
|
|
||||||
|
QualType Args[] = {Context.VoidPtrTy.withConst().withRestrict()};
|
||||||
|
FunctionProtoType::ExtProtoInfo EPI;
|
||||||
|
EPI.Variadic = true;
|
||||||
|
QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI);
|
||||||
|
Sema::CapturedParamNameType Params[] = {
|
||||||
|
std::make_pair(".global_tid.", KmpInt32Ty),
|
||||||
|
std::make_pair(".part_id.", Context.getPointerType(KmpInt32Ty)),
|
||||||
|
std::make_pair(".privates.", Context.VoidPtrTy.withConst()),
|
||||||
|
std::make_pair(".copy_fn.",
|
||||||
|
Context.getPointerType(CopyFnType).withConst()),
|
||||||
|
std::make_pair(".task_t.", Context.VoidPtrTy.withConst()),
|
||||||
|
std::make_pair(StringRef(), QualType()) // __context with shared vars
|
||||||
|
};
|
||||||
|
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
|
||||||
|
Params);
|
||||||
Sema::CapturedParamNameType ParamsTarget[] = {
|
Sema::CapturedParamNameType ParamsTarget[] = {
|
||||||
std::make_pair(StringRef(), QualType()) // __context with shared vars
|
std::make_pair(StringRef(), QualType()) // __context with shared vars
|
||||||
};
|
};
|
||||||
|
@ -6354,13 +6409,23 @@ StmtResult Sema::ActOnOpenMPTargetDirective(ArrayRef<OMPClause *> Clauses,
|
||||||
// The point of exit cannot be a branch out of the structured block.
|
// The point of exit cannot be a branch out of the structured block.
|
||||||
// longjmp() and throw() must not violate the entry/exit criteria.
|
// longjmp() and throw() must not violate the entry/exit criteria.
|
||||||
CS->getCapturedDecl()->setNothrow();
|
CS->getCapturedDecl()->setNothrow();
|
||||||
|
for (int ThisCaptureLevel = getOpenMPCaptureLevels(OMPD_target);
|
||||||
|
ThisCaptureLevel > 1; --ThisCaptureLevel) {
|
||||||
|
CS = cast<CapturedStmt>(CS->getCapturedStmt());
|
||||||
|
// 1.2.2 OpenMP Language Terminology
|
||||||
|
// Structured block - An executable statement with a single entry at the
|
||||||
|
// top and a single exit at the bottom.
|
||||||
|
// The point of exit cannot be a branch out of the structured block.
|
||||||
|
// longjmp() and throw() must not violate the entry/exit criteria.
|
||||||
|
CS->getCapturedDecl()->setNothrow();
|
||||||
|
}
|
||||||
|
|
||||||
// OpenMP [2.16, Nesting of Regions]
|
// OpenMP [2.16, Nesting of Regions]
|
||||||
// If specified, a teams construct must be contained within a target
|
// If specified, a teams construct must be contained within a target
|
||||||
// construct. That target construct must contain no statements or directives
|
// construct. That target construct must contain no statements or directives
|
||||||
// outside of the teams construct.
|
// outside of the teams construct.
|
||||||
if (DSAStack->hasInnerTeamsRegion()) {
|
if (DSAStack->hasInnerTeamsRegion()) {
|
||||||
auto S = AStmt->IgnoreContainers(/*IgnoreCaptured*/ true);
|
Stmt *S = CS->IgnoreContainers(/*IgnoreCaptured=*/true);
|
||||||
bool OMPTeamsFound = true;
|
bool OMPTeamsFound = true;
|
||||||
if (auto *CS = dyn_cast<CompoundStmt>(S)) {
|
if (auto *CS = dyn_cast<CompoundStmt>(S)) {
|
||||||
auto I = CS->body_begin();
|
auto I = CS->body_begin();
|
||||||
|
@ -6407,6 +6472,16 @@ Sema::ActOnOpenMPTargetParallelDirective(ArrayRef<OMPClause *> Clauses,
|
||||||
// The point of exit cannot be a branch out of the structured block.
|
// The point of exit cannot be a branch out of the structured block.
|
||||||
// longjmp() and throw() must not violate the entry/exit criteria.
|
// longjmp() and throw() must not violate the entry/exit criteria.
|
||||||
CS->getCapturedDecl()->setNothrow();
|
CS->getCapturedDecl()->setNothrow();
|
||||||
|
for (int ThisCaptureLevel = getOpenMPCaptureLevels(OMPD_target_parallel);
|
||||||
|
ThisCaptureLevel > 1; --ThisCaptureLevel) {
|
||||||
|
CS = cast<CapturedStmt>(CS->getCapturedStmt());
|
||||||
|
// 1.2.2 OpenMP Language Terminology
|
||||||
|
// Structured block - An executable statement with a single entry at the
|
||||||
|
// top and a single exit at the bottom.
|
||||||
|
// The point of exit cannot be a branch out of the structured block.
|
||||||
|
// longjmp() and throw() must not violate the entry/exit criteria.
|
||||||
|
CS->getCapturedDecl()->setNothrow();
|
||||||
|
}
|
||||||
|
|
||||||
getCurFunction()->setHasBranchProtectedScope();
|
getCurFunction()->setHasBranchProtectedScope();
|
||||||
|
|
||||||
|
@ -8049,6 +8124,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
|
||||||
case OMPD_target_update:
|
case OMPD_target_update:
|
||||||
case OMPD_target_enter_data:
|
case OMPD_target_enter_data:
|
||||||
case OMPD_target_exit_data:
|
case OMPD_target_exit_data:
|
||||||
|
case OMPD_target:
|
||||||
CaptureRegion = OMPD_task;
|
CaptureRegion = OMPD_task;
|
||||||
break;
|
break;
|
||||||
case OMPD_target_teams:
|
case OMPD_target_teams:
|
||||||
|
@ -8057,7 +8133,6 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
|
||||||
case OMPD_target_teams_distribute_parallel_for:
|
case OMPD_target_teams_distribute_parallel_for:
|
||||||
case OMPD_target_teams_distribute_parallel_for_simd:
|
case OMPD_target_teams_distribute_parallel_for_simd:
|
||||||
case OMPD_target_data:
|
case OMPD_target_data:
|
||||||
case OMPD_target:
|
|
||||||
case OMPD_target_simd:
|
case OMPD_target_simd:
|
||||||
case OMPD_target_parallel:
|
case OMPD_target_parallel:
|
||||||
case OMPD_target_parallel_for:
|
case OMPD_target_parallel_for:
|
||||||
|
@ -11419,8 +11494,8 @@ OMPClause *Sema::ActOnOpenMPDeviceClause(Expr *Device, SourceLocation StartLoc,
|
||||||
HelperValStmt = buildPreInits(Context, Captures);
|
HelperValStmt = buildPreInits(Context, Captures);
|
||||||
}
|
}
|
||||||
|
|
||||||
return new (Context)
|
return new (Context) OMPDeviceClause(ValExpr, HelperValStmt, CaptureRegion,
|
||||||
OMPDeviceClause(ValExpr, HelperValStmt, StartLoc, LParenLoc, EndLoc);
|
StartLoc, LParenLoc, EndLoc);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool CheckTypeMappable(SourceLocation SL, SourceRange SR, Sema &SemaRef,
|
static bool CheckTypeMappable(SourceLocation SL, SourceRange SR, Sema &SemaRef,
|
||||||
|
|
|
@ -115,7 +115,9 @@ int foo(int n) {
|
||||||
static long *plocal;
|
static long *plocal;
|
||||||
|
|
||||||
// CHECK: [[ADD:%.+]] = add nsw i32
|
// CHECK: [[ADD:%.+]] = add nsw i32
|
||||||
// CHECK: [[DEVICE:%.+]] = sext i32 [[ADD]] to i64
|
// CHECK: store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
|
||||||
|
// CHECK: [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
|
||||||
|
// CHECK: [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
|
||||||
// CHECK: [[RET:%.+]] = call i32 @__tgt_target(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null)
|
// CHECK: [[RET:%.+]] = call i32 @__tgt_target(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null)
|
||||||
// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
|
// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
|
||||||
// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
|
// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
|
||||||
|
@ -128,7 +130,9 @@ int foo(int n) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// CHECK-DAG: [[ADD:%.+]] = add nsw i32
|
// CHECK-DAG: [[ADD:%.+]] = add nsw i32
|
||||||
// CHECK-DAG: [[DEVICE:%.+]] = sext i32 [[ADD]] to i64
|
// CHECK-DAG: store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
|
||||||
|
// CHECK-DAG: [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
|
||||||
|
// CHECK-DAG: [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
|
||||||
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0)
|
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0)
|
||||||
// CHECK-DAG: [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
|
// CHECK-DAG: [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
|
||||||
// CHECK-DAG: [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
|
// CHECK-DAG: [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
|
||||||
|
@ -234,13 +238,13 @@ int foo(int n) {
|
||||||
// CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
|
// CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
|
||||||
// CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
|
// CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
|
||||||
|
|
||||||
|
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
|
||||||
|
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[IFELSE:[^,]+]]
|
||||||
|
// CHECK: [[TRY]]
|
||||||
// CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
|
// CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
|
||||||
// CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
|
// CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
|
||||||
// CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
|
// CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
|
||||||
|
|
||||||
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
|
|
||||||
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[IFELSE:[^,]+]]
|
|
||||||
// CHECK: [[TRY]]
|
|
||||||
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0))
|
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0))
|
||||||
// CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
|
// CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
|
||||||
// CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
|
// CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
|
||||||
|
@ -510,13 +514,13 @@ int bar(int n){
|
||||||
// CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
|
// CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
|
||||||
// CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
|
// CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
|
||||||
|
|
||||||
|
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
|
||||||
|
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[IFELSE:[^,]+]]
|
||||||
|
// CHECK: [[TRY]]
|
||||||
// We capture 2 VLA sizes in this target region
|
// We capture 2 VLA sizes in this target region
|
||||||
// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
|
// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
|
||||||
// CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
|
// CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
|
||||||
|
|
||||||
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
|
|
||||||
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[IFELSE:[^,]+]]
|
|
||||||
// CHECK: [[TRY]]
|
|
||||||
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0))
|
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0))
|
||||||
// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
|
// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
|
||||||
// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
|
// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
|
||||||
|
|
|
@ -0,0 +1,261 @@
|
||||||
|
// Test host codegen.
|
||||||
|
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
|
||||||
|
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||||
|
// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
|
||||||
|
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
|
||||||
|
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||||
|
// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
|
||||||
|
|
||||||
|
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
|
||||||
|
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||||
|
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
|
||||||
|
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
|
||||||
|
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||||
|
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
|
||||||
|
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
|
||||||
|
|
||||||
|
// Test target codegen - host bc file has to be created first.
|
||||||
|
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||||
|
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
|
||||||
|
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
|
||||||
|
// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
|
||||||
|
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
|
||||||
|
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
|
||||||
|
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
|
||||||
|
// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
|
||||||
|
|
||||||
|
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||||
|
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
|
||||||
|
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
|
||||||
|
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
|
||||||
|
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
|
||||||
|
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
|
||||||
|
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
|
||||||
|
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
|
||||||
|
// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
|
||||||
|
|
||||||
|
// expected-no-diagnostics
|
||||||
|
#ifndef HEADER
|
||||||
|
#define HEADER
|
||||||
|
|
||||||
|
// CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
|
||||||
|
// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
|
||||||
|
// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
|
||||||
|
// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
|
||||||
|
|
||||||
|
// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 }
|
||||||
|
|
||||||
|
// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat
|
||||||
|
|
||||||
|
// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 0, i[[SZ]] 4]
|
||||||
|
// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [2 x i64] [i64 32, i64 288]
|
||||||
|
// CHECK-DAG: @{{.*}} = private constant i8 0
|
||||||
|
|
||||||
|
// TCHECK: @{{.+}} = constant [[ENTTY]]
|
||||||
|
// TCHECK: @{{.+}} = {{.*}}constant [[ENTTY]]
|
||||||
|
// TCHECK-NOT: @{{.+}} = constant [[ENTTY]]
|
||||||
|
|
||||||
|
// Check if offloading descriptor is created.
|
||||||
|
// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
|
||||||
|
// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
|
||||||
|
// CHECK: [[DEVBEGIN:@.+]] = external constant i8
|
||||||
|
// CHECK: [[DEVEND:@.+]] = external constant i8
|
||||||
|
// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]])
|
||||||
|
// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]])
|
||||||
|
|
||||||
|
// Check target registration is registered as a Ctor.
|
||||||
|
// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }]
|
||||||
|
|
||||||
|
|
||||||
|
template<typename tx, typename ty>
|
||||||
|
struct TT{
|
||||||
|
tx X;
|
||||||
|
ty Y;
|
||||||
|
};
|
||||||
|
|
||||||
|
int global;
|
||||||
|
extern int global;
|
||||||
|
|
||||||
|
// CHECK: define {{.*}}[[FOO:@.+]](
|
||||||
|
int foo(int n) {
|
||||||
|
int a = 0;
|
||||||
|
short aa = 0;
|
||||||
|
float b[10];
|
||||||
|
float bn[n];
|
||||||
|
double c[5][10];
|
||||||
|
double cn[5][n];
|
||||||
|
TT<long long, char> d;
|
||||||
|
static long *plocal;
|
||||||
|
|
||||||
|
// CHECK: [[ADD:%.+]] = add nsw i32
|
||||||
|
// CHECK: store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
|
||||||
|
// CHECK: [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
|
||||||
|
// CHECK: [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
|
||||||
|
// CHECK: store i32 [[DEV]], i32* [[GEP]],
|
||||||
|
// CHECK: [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID:%.+]], i32 1, i[[SZ]] {{20|40}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*))
|
||||||
|
// CHECK: [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY0:%.+]]*
|
||||||
|
// CHECK: getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
|
||||||
|
// CHECK: getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
|
||||||
|
// CHECK: getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
|
||||||
|
// CHECK: getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 3
|
||||||
|
// CHECK: [[DEP_START:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
|
||||||
|
// CHECK: [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
|
||||||
|
// CHECK: call void @__kmpc_omp_wait_deps(%ident_t* @0, i32 [[GTID]], i32 4, i8* [[DEP]], i32 0, i8* null)
|
||||||
|
// CHECK: call void @__kmpc_omp_task_begin_if0(%ident_t* @0, i32 [[GTID]], i8* [[TASK]])
|
||||||
|
// CHECK: call i32 [[TASK_ENTRY0]](i32 [[GTID]], [[TASK_TY0]]* [[BC_TASK]])
|
||||||
|
// CHECK: call void @__kmpc_omp_task_complete_if0(%ident_t* @0, i32 [[GTID]], i8* [[TASK]])
|
||||||
|
#pragma omp target device(global + a) depend(in: global) depend(out: a, b, cn[4])
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK: [[ADD:%.+]] = add nsw i32
|
||||||
|
// CHECK: store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
|
||||||
|
|
||||||
|
// CHECK: [[BOOL:%.+]] = icmp ne i32 %{{.+}}, 0
|
||||||
|
// CHECK: br i1 [[BOOL]], label %[[THEN:.+]], label %[[ELSE:.+]]
|
||||||
|
// CHECK: [[THEN]]:
|
||||||
|
// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%.+]], i32 0, i32 0
|
||||||
|
// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i32 0, i32 0
|
||||||
|
// CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]**
|
||||||
|
// CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]**
|
||||||
|
// CHECK-DAG: store i[[SZ]]* [[BP0:%[^,]+]], i[[SZ]]** [[CBPADDR0]]
|
||||||
|
// CHECK-DAG: store i[[SZ]]* [[BP0]], i[[SZ]]** [[CPADDR0]]
|
||||||
|
|
||||||
|
// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
|
||||||
|
// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1
|
||||||
|
// CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
|
||||||
|
// CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
|
||||||
|
// CHECK-DAG: store i[[SZ]] [[BP1:%[^,]+]], i[[SZ]]* [[CBPADDR1]]
|
||||||
|
// CHECK-DAG: store i[[SZ]] [[BP1]], i[[SZ]]* [[CPADDR1]]
|
||||||
|
// CHECK-DAG: getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
|
||||||
|
// CHECK-DAG: getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
|
||||||
|
// CHECK: [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
|
||||||
|
// CHECK: [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
|
||||||
|
// CHECK: store i32 [[DEV]], i32* [[GEP]],
|
||||||
|
|
||||||
|
// CHECK: [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{104|52}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*))
|
||||||
|
// CHECK: [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
|
||||||
|
// CHECK: getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
|
||||||
|
// CHECK: getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
|
||||||
|
// CHECK: getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
|
||||||
|
// CHECK: [[DEP_START:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
|
||||||
|
// CHECK: [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
|
||||||
|
// CHECK: call i32 @__kmpc_omp_task_with_deps(%ident_t* @0, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
|
||||||
|
// CHECK: br label %[[EXIT:.+]]
|
||||||
|
|
||||||
|
// CHECK: [[ELSE]]:
|
||||||
|
// CHECK-NOT: getelementptr inbounds [2 x i8*], [2 x i8*]*
|
||||||
|
// CHECK: [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
|
||||||
|
// CHECK: [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
|
||||||
|
// CHECK: store i32 [[DEV]], i32* [[GEP]],
|
||||||
|
|
||||||
|
// CHECK: [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{56|28}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1__:@.+]] to i32 (i32, i8*)*))
|
||||||
|
// CHECK: [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1__:%.+]]*
|
||||||
|
// CHECK: getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
|
||||||
|
// CHECK: getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
|
||||||
|
// CHECK: getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
|
||||||
|
// CHECK: [[DEP_START:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
|
||||||
|
// CHECK: [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
|
||||||
|
// CHECK: call i32 @__kmpc_omp_task_with_deps(%ident_t* @0, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
|
||||||
|
// CHECK: br label %[[EXIT:.+]]
|
||||||
|
// CHECK: [[EXIT]]:
|
||||||
|
|
||||||
|
#pragma omp target device(global + a) nowait depend(inout: global, a, bn) if(a)
|
||||||
|
{
|
||||||
|
static int local1;
|
||||||
|
*plocal = global;
|
||||||
|
local1 = global;
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK: [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{48|24}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
|
||||||
|
// CHECK: [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY2:%.+]]*
|
||||||
|
// CHECK: getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
|
||||||
|
// CHECK: [[DEP_START:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
|
||||||
|
// CHECK: [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
|
||||||
|
// CHECK: call void @__kmpc_omp_wait_deps(%ident_t* @0, i32 [[GTID]], i32 1, i8* [[DEP]], i32 0, i8* null)
|
||||||
|
// CHECK: call void @__kmpc_omp_task_begin_if0(%ident_t* @0, i32 [[GTID]], i8* [[TASK]])
|
||||||
|
// CHECK: call i32 [[TASK_ENTRY2]](i32 [[GTID]], [[TASK_TY2]]* [[BC_TASK]])
|
||||||
|
// CHECK: call void @__kmpc_omp_task_complete_if0(%ident_t* @0, i32 [[GTID]], i8* [[TASK]])
|
||||||
|
#pragma omp target if(0) firstprivate(global) depend(out:global)
|
||||||
|
{
|
||||||
|
global += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that the offloading functions are emitted and that the arguments are
|
||||||
|
// correct and loaded correctly for the target regions in foo().
|
||||||
|
|
||||||
|
// CHECK: define internal void [[HVT0:@.+]]()
|
||||||
|
|
||||||
|
// CHECK: define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, [[TASK_TY0]]* noalias)
|
||||||
|
// CHECK: store void (i8*, ...)* null, void (i8*, ...)** %
|
||||||
|
// CHECK: [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
|
||||||
|
// CHECK: [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
|
||||||
|
// CHECK: [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
|
||||||
|
// CHECK: [[RET:%.+]] = call i32 @__tgt_target(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null)
|
||||||
|
// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
|
||||||
|
// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
|
||||||
|
// CHECK: [[FAIL]]
|
||||||
|
// CHECK: call void [[HVT0]]()
|
||||||
|
// CHECK-NEXT: br label %[[END]]
|
||||||
|
// CHECK: [[END]]
|
||||||
|
// CHECK: ret i32 0
|
||||||
|
|
||||||
|
// CHECK: define internal void [[HVT1:@.+]](i[[SZ]]* %{{.+}}, i[[SZ]] %{{.+}})
|
||||||
|
|
||||||
|
// CHECK: define internal{{.*}} i32 [[TASK_ENTRY1_]](i32{{.*}}, [[TASK_TY1_]]* noalias)
|
||||||
|
// CHECK: call void (i8*, ...) %
|
||||||
|
// CHECK: [[SZT:%.+]] = getelementptr inbounds [2 x i[[SZ]]], [2 x i[[SZ]]]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
|
||||||
|
// CHECK: [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
|
||||||
|
// CHECK: [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
|
||||||
|
// CHECK: [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
|
||||||
|
// CHECK: [[RET:%.+]] = call i32 @__tgt_target_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0)
|
||||||
|
|
||||||
|
// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
|
||||||
|
// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
|
||||||
|
// CHECK: [[FAIL]]
|
||||||
|
// CHECK: [[BP0:%.+]] = load i[[SZ]]*, i[[SZ]]** %
|
||||||
|
// CHECK: [[BP1_I32:%.+]] = load i32, i32* %
|
||||||
|
// CHECK-64: [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
|
||||||
|
// CHECK-64: store i32 [[BP1_I32]], i32* [[BP1_CAST]],
|
||||||
|
// CHECK-32: store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
|
||||||
|
// CHECK: [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
|
||||||
|
// CHECK: call void [[HVT1]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]])
|
||||||
|
// CHECK-NEXT: br label %[[END]]
|
||||||
|
// CHECK: [[END]]
|
||||||
|
// CHECK: ret i32 0
|
||||||
|
|
||||||
|
// CHECK: define internal{{.*}} i32 [[TASK_ENTRY1__]](i32{{.*}}, [[TASK_TY1__]]* noalias)
|
||||||
|
// CHECK: call void (i8*, ...) %
|
||||||
|
// CHECK: [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
|
||||||
|
// CHECK: [[BP0:%.+]] = load i[[SZ]]*, i[[SZ]]** %
|
||||||
|
// CHECK: [[BP1_I32:%.+]] = load i32, i32* %
|
||||||
|
// CHECK-64: [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
|
||||||
|
// CHECK-64: store i32 [[BP1_I32]], i32* [[BP1_CAST]],
|
||||||
|
// CHECK-32: store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
|
||||||
|
// CHECK: [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
|
||||||
|
// CHECK: call void [[HVT1]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]])
|
||||||
|
// CHECK: ret i32 0
|
||||||
|
|
||||||
|
// CHECK: define internal void [[HVT2:@.+]](i[[SZ]] %{{.+}})
|
||||||
|
// Create stack storage and store argument in there.
|
||||||
|
// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align
|
||||||
|
// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
|
||||||
|
// CHECK-64: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
|
||||||
|
// CHECK-64: load i32, i32* [[AA_CADDR]], align
|
||||||
|
// CHECK-32: load i32, i32* [[AA_ADDR]], align
|
||||||
|
|
||||||
|
// CHECK: define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, [[TASK_TY2]]* noalias)
|
||||||
|
// CHECK: call void (i8*, ...) %
|
||||||
|
// CHECK: [[BP1_I32:%.+]] = load i32, i32* %
|
||||||
|
// CHECK-64: [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
|
||||||
|
// CHECK-64: store i32 [[BP1_I32]], i32* [[BP1_CAST]],
|
||||||
|
// CHECK-32: store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
|
||||||
|
// CHECK: [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
|
||||||
|
// CHECK: call void [[HVT2]](i[[SZ]] [[BP1]])
|
||||||
|
// CHECK: ret i32 0
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
|
@ -197,13 +197,13 @@ int foo(int n) {
|
||||||
// CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
|
// CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
|
||||||
// CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
|
// CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
|
||||||
|
|
||||||
|
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
|
||||||
|
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
||||||
|
// CHECK: [[TRY]]
|
||||||
// CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
|
// CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
|
||||||
// CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
|
// CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
|
||||||
// CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
|
// CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
|
||||||
|
|
||||||
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
|
|
||||||
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
|
||||||
// CHECK: [[TRY]]
|
|
||||||
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0)
|
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0)
|
||||||
// CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
|
// CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
|
||||||
// CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
|
// CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
|
||||||
|
@ -531,13 +531,13 @@ int bar(int n){
|
||||||
// CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
|
// CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
|
||||||
// CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
|
// CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
|
||||||
|
|
||||||
|
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
|
||||||
|
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
||||||
|
// CHECK: [[TRY]]
|
||||||
// We capture 2 VLA sizes in this target region
|
// We capture 2 VLA sizes in this target region
|
||||||
// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
|
// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
|
||||||
// CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
|
// CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
|
||||||
|
|
||||||
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
|
|
||||||
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
|
||||||
// CHECK: [[TRY]]
|
|
||||||
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0)
|
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0)
|
||||||
// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
|
// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
|
||||||
// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
|
// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
|
||||||
|
|
|
@ -218,13 +218,13 @@ int foo(int n) {
|
||||||
// CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
|
// CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
|
||||||
// CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
|
// CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
|
||||||
|
|
||||||
|
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
|
||||||
|
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
||||||
|
// CHECK: [[TRY]]
|
||||||
// CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
|
// CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
|
||||||
// CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
|
// CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
|
||||||
// CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
|
// CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
|
||||||
|
|
||||||
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
|
|
||||||
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
|
||||||
// CHECK: [[TRY]]
|
|
||||||
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0)
|
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0)
|
||||||
// CHECK-DAG: [[BPR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
|
// CHECK-DAG: [[BPR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
|
||||||
// CHECK-DAG: [[PR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P:%[^,]+]], i32 0, i32 0
|
// CHECK-DAG: [[PR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P:%[^,]+]], i32 0, i32 0
|
||||||
|
@ -558,13 +558,13 @@ int bar(int n){
|
||||||
// CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
|
// CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
|
||||||
// CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
|
// CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
|
||||||
|
|
||||||
|
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
|
||||||
|
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
||||||
|
// CHECK: [[TRY]]
|
||||||
// We capture 2 VLA sizes in this target region
|
// We capture 2 VLA sizes in this target region
|
||||||
// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
|
// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
|
||||||
// CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
|
// CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
|
||||||
|
|
||||||
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
|
|
||||||
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
|
||||||
// CHECK: [[TRY]]
|
|
||||||
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0)
|
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0)
|
||||||
// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
|
// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
|
||||||
// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
|
// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
|
||||||
|
|
|
@ -215,13 +215,13 @@ int foo(int n) {
|
||||||
// CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
|
// CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
|
||||||
// CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
|
// CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
|
||||||
|
|
||||||
|
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
|
||||||
|
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
||||||
|
// CHECK: [[TRY]]
|
||||||
// CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
|
// CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
|
||||||
// CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
|
// CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
|
||||||
// CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
|
// CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
|
||||||
|
|
||||||
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
|
|
||||||
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
|
||||||
// CHECK: [[TRY]]
|
|
||||||
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0)
|
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0)
|
||||||
// CHECK-DAG: [[BPR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
|
// CHECK-DAG: [[BPR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
|
||||||
// CHECK-DAG: [[PR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P:%[^,]+]], i32 0, i32 0
|
// CHECK-DAG: [[PR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P:%[^,]+]], i32 0, i32 0
|
||||||
|
@ -558,13 +558,13 @@ int bar(int n){
|
||||||
// CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
|
// CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
|
||||||
// CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
|
// CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
|
||||||
|
|
||||||
|
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
|
||||||
|
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
||||||
|
// CHECK: [[TRY]]
|
||||||
// We capture 2 VLA sizes in this target region
|
// We capture 2 VLA sizes in this target region
|
||||||
// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
|
// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
|
||||||
// CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
|
// CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
|
||||||
|
|
||||||
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
|
|
||||||
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
|
||||||
// CHECK: [[TRY]]
|
|
||||||
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0)
|
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0)
|
||||||
// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
|
// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
|
||||||
// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
|
// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
|
||||||
|
|
|
@ -210,13 +210,13 @@ int foo(int n) {
|
||||||
// CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
|
// CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
|
||||||
// CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
|
// CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
|
||||||
|
|
||||||
|
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
|
||||||
|
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
||||||
|
// CHECK: [[TRY]]
|
||||||
// CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
|
// CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
|
||||||
// CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
|
// CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
|
||||||
// CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
|
// CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
|
||||||
|
|
||||||
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
|
|
||||||
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
|
||||||
// CHECK: [[TRY]]
|
|
||||||
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0))
|
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0))
|
||||||
// CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
|
// CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
|
||||||
// CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
|
// CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
|
||||||
|
@ -481,13 +481,13 @@ int bar(int n){
|
||||||
// CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
|
// CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
|
||||||
// CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
|
// CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
|
||||||
|
|
||||||
|
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
|
||||||
|
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
||||||
|
// CHECK: [[TRY]]
|
||||||
// We capture 2 VLA sizes in this target region
|
// We capture 2 VLA sizes in this target region
|
||||||
// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
|
// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
|
||||||
// CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
|
// CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
|
||||||
|
|
||||||
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
|
|
||||||
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
|
||||||
// CHECK: [[TRY]]
|
|
||||||
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0))
|
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0))
|
||||||
// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
|
// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
|
||||||
// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
|
// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
|
||||||
|
|
|
@ -220,13 +220,13 @@ int foo(int n) {
|
||||||
// CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
|
// CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
|
||||||
// CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
|
// CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
|
||||||
|
|
||||||
|
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
|
||||||
|
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
||||||
|
// CHECK: [[TRY]]
|
||||||
// CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
|
// CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
|
||||||
// CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
|
// CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
|
||||||
// CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
|
// CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
|
||||||
|
|
||||||
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
|
|
||||||
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
|
||||||
// CHECK: [[TRY]]
|
|
||||||
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 0, i32 0)
|
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 0, i32 0)
|
||||||
// CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
|
// CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
|
||||||
// CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
|
// CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
|
||||||
|
@ -556,13 +556,13 @@ int bar(int n){
|
||||||
// CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
|
// CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
|
||||||
// CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
|
// CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
|
||||||
|
|
||||||
|
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
|
||||||
|
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
||||||
|
// CHECK: [[TRY]]
|
||||||
// We capture 2 VLA sizes in this target region
|
// We capture 2 VLA sizes in this target region
|
||||||
// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
|
// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
|
||||||
// CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
|
// CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
|
||||||
|
|
||||||
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
|
|
||||||
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
|
||||||
// CHECK: [[TRY]]
|
|
||||||
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i32 0, i32 0)
|
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i32 0, i32 0)
|
||||||
// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
|
// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
|
||||||
// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
|
// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
|
||||||
|
|
|
@ -221,13 +221,13 @@ int foo(int n) {
|
||||||
// CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
|
// CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
|
||||||
// CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
|
// CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
|
||||||
|
|
||||||
|
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
|
||||||
|
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
||||||
|
// CHECK: [[TRY]]
|
||||||
// CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
|
// CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
|
||||||
// CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
|
// CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
|
||||||
// CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
|
// CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
|
||||||
|
|
||||||
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
|
|
||||||
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
|
||||||
// CHECK: [[TRY]]
|
|
||||||
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i32 0, i32 0)
|
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i32 0, i32 0)
|
||||||
// CHECK-DAG: [[BPR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
|
// CHECK-DAG: [[BPR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
|
||||||
// CHECK-DAG: [[PR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P:%[^,]+]], i32 0, i32 0
|
// CHECK-DAG: [[PR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P:%[^,]+]], i32 0, i32 0
|
||||||
|
@ -567,13 +567,13 @@ int bar(int n){
|
||||||
// CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
|
// CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
|
||||||
// CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
|
// CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
|
||||||
|
|
||||||
|
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
|
||||||
|
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
||||||
|
// CHECK: [[TRY]]
|
||||||
// We capture 2 VLA sizes in this target region
|
// We capture 2 VLA sizes in this target region
|
||||||
// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
|
// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
|
||||||
// CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
|
// CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
|
||||||
|
|
||||||
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
|
|
||||||
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
|
||||||
// CHECK: [[TRY]]
|
|
||||||
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT5]], i32 0, i32 0), i32 0, i32 0)
|
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT5]], i32 0, i32 0), i32 0, i32 0)
|
||||||
// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
|
// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
|
||||||
// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
|
// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
|
||||||
|
|
|
@ -220,13 +220,13 @@ int foo(int n) {
|
||||||
// CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
|
// CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
|
||||||
// CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
|
// CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
|
||||||
|
|
||||||
|
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
|
||||||
|
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
||||||
|
// CHECK: [[TRY]]
|
||||||
// CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
|
// CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
|
||||||
// CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
|
// CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
|
||||||
// CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
|
// CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
|
||||||
|
|
||||||
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
|
|
||||||
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
|
||||||
// CHECK: [[TRY]]
|
|
||||||
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 0, i32 0)
|
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 0, i32 0)
|
||||||
// CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
|
// CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
|
||||||
// CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
|
// CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
|
||||||
|
@ -556,13 +556,13 @@ int bar(int n){
|
||||||
// CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
|
// CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
|
||||||
// CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
|
// CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
|
||||||
|
|
||||||
|
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
|
||||||
|
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
||||||
|
// CHECK: [[TRY]]
|
||||||
// We capture 2 VLA sizes in this target region
|
// We capture 2 VLA sizes in this target region
|
||||||
// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
|
// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
|
||||||
// CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
|
// CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
|
||||||
|
|
||||||
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
|
|
||||||
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
|
||||||
// CHECK: [[TRY]]
|
|
||||||
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT5]], i32 0, i32 0), i32 0, i32 0)
|
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT5]], i32 0, i32 0), i32 0, i32 0)
|
||||||
// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
|
// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
|
||||||
// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
|
// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
|
||||||
|
|
Loading…
Reference in New Issue