forked from OSchip/llvm-project
[OPENMP][NVPTX] Add support for lightweight runtime.
If the target construct can be executed in SPMD mode + it is a loop based directive with static scheduling, we can use lightweight runtime support. llvm-svn: 340953
This commit is contained in:
parent
0f29e953b7
commit
8d8e1235ab
|
@ -672,11 +672,19 @@ static bool hasParallelIfNumThreadsClause(ASTContext &Ctx,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Checks if the directive is the distribute clause with the lastprivate
|
||||||
|
/// clauses. This construct does not support SPMD execution mode.
|
||||||
|
static bool hasDistributeWithLastprivateClauses(const OMPExecutableDirective &D) {
|
||||||
|
return isOpenMPDistributeDirective(D.getDirectiveKind()) &&
|
||||||
|
D.hasClausesOfKind<OMPLastprivateClause>();
|
||||||
|
}
|
||||||
|
|
||||||
/// Check for inner (nested) SPMD construct, if any
|
/// Check for inner (nested) SPMD construct, if any
|
||||||
static bool hasNestedSPMDDirective(ASTContext &Ctx,
|
static bool hasNestedSPMDDirective(ASTContext &Ctx,
|
||||||
const OMPExecutableDirective &D) {
|
const OMPExecutableDirective &D) {
|
||||||
const auto *CS = D.getInnermostCapturedStmt();
|
const auto *CS = D.getInnermostCapturedStmt();
|
||||||
const auto *Body = CS->getCapturedStmt()->IgnoreContainers();
|
const auto *Body =
|
||||||
|
CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
|
||||||
const Stmt *ChildStmt = getSingleCompoundChild(Body);
|
const Stmt *ChildStmt = getSingleCompoundChild(Body);
|
||||||
|
|
||||||
if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
||||||
|
@ -684,57 +692,33 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
|
||||||
switch (D.getDirectiveKind()) {
|
switch (D.getDirectiveKind()) {
|
||||||
case OMPD_target:
|
case OMPD_target:
|
||||||
if (isOpenMPParallelDirective(DKind) &&
|
if (isOpenMPParallelDirective(DKind) &&
|
||||||
!hasParallelIfNumThreadsClause(Ctx, *NestedDir))
|
!hasParallelIfNumThreadsClause(Ctx, *NestedDir) &&
|
||||||
|
!hasDistributeWithLastprivateClauses(*NestedDir))
|
||||||
return true;
|
return true;
|
||||||
if (DKind == OMPD_teams || DKind == OMPD_teams_distribute) {
|
if (DKind == OMPD_teams) {
|
||||||
Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
|
Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
|
||||||
|
/*IgnoreCaptured=*/true);
|
||||||
if (!Body)
|
if (!Body)
|
||||||
return false;
|
return false;
|
||||||
ChildStmt = getSingleCompoundChild(Body);
|
ChildStmt = getSingleCompoundChild(Body);
|
||||||
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
||||||
DKind = NND->getDirectiveKind();
|
DKind = NND->getDirectiveKind();
|
||||||
if (isOpenMPParallelDirective(DKind) &&
|
if (isOpenMPParallelDirective(DKind) &&
|
||||||
!hasParallelIfNumThreadsClause(Ctx, *NND))
|
!hasParallelIfNumThreadsClause(Ctx, *NND) &&
|
||||||
|
!hasDistributeWithLastprivateClauses(*NND))
|
||||||
return true;
|
return true;
|
||||||
if (DKind == OMPD_distribute) {
|
|
||||||
Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
|
|
||||||
if (!Body)
|
|
||||||
return false;
|
|
||||||
ChildStmt = getSingleCompoundChild(Body);
|
|
||||||
if (!ChildStmt)
|
|
||||||
return false;
|
|
||||||
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
|
||||||
DKind = NND->getDirectiveKind();
|
|
||||||
return isOpenMPParallelDirective(DKind) &&
|
|
||||||
!hasParallelIfNumThreadsClause(Ctx, *NND);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
case OMPD_target_teams:
|
case OMPD_target_teams:
|
||||||
if (isOpenMPParallelDirective(DKind) &&
|
|
||||||
!hasParallelIfNumThreadsClause(Ctx, *NestedDir))
|
|
||||||
return true;
|
|
||||||
if (DKind == OMPD_distribute) {
|
|
||||||
Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
|
|
||||||
if (!Body)
|
|
||||||
return false;
|
|
||||||
ChildStmt = getSingleCompoundChild(Body);
|
|
||||||
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
|
||||||
DKind = NND->getDirectiveKind();
|
|
||||||
return isOpenMPParallelDirective(DKind) &&
|
return isOpenMPParallelDirective(DKind) &&
|
||||||
!hasParallelIfNumThreadsClause(Ctx, *NND);
|
!hasParallelIfNumThreadsClause(Ctx, *NestedDir) &&
|
||||||
}
|
!hasDistributeWithLastprivateClauses(*NestedDir);
|
||||||
}
|
|
||||||
return false;
|
|
||||||
case OMPD_target_teams_distribute:
|
|
||||||
return isOpenMPParallelDirective(DKind) &&
|
|
||||||
!hasParallelIfNumThreadsClause(Ctx, *NestedDir);
|
|
||||||
case OMPD_target_simd:
|
case OMPD_target_simd:
|
||||||
case OMPD_target_parallel:
|
case OMPD_target_parallel:
|
||||||
case OMPD_target_parallel_for:
|
case OMPD_target_parallel_for:
|
||||||
case OMPD_target_parallel_for_simd:
|
case OMPD_target_parallel_for_simd:
|
||||||
|
case OMPD_target_teams_distribute:
|
||||||
case OMPD_target_teams_distribute_simd:
|
case OMPD_target_teams_distribute_simd:
|
||||||
case OMPD_target_teams_distribute_parallel_for:
|
case OMPD_target_teams_distribute_parallel_for:
|
||||||
case OMPD_target_teams_distribute_parallel_for_simd:
|
case OMPD_target_teams_distribute_parallel_for_simd:
|
||||||
|
@ -794,15 +778,239 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx,
|
||||||
switch (DirectiveKind) {
|
switch (DirectiveKind) {
|
||||||
case OMPD_target:
|
case OMPD_target:
|
||||||
case OMPD_target_teams:
|
case OMPD_target_teams:
|
||||||
case OMPD_target_teams_distribute:
|
|
||||||
return hasNestedSPMDDirective(Ctx, D);
|
return hasNestedSPMDDirective(Ctx, D);
|
||||||
case OMPD_target_parallel:
|
case OMPD_target_parallel:
|
||||||
case OMPD_target_parallel_for:
|
case OMPD_target_parallel_for:
|
||||||
case OMPD_target_parallel_for_simd:
|
case OMPD_target_parallel_for_simd:
|
||||||
|
return !hasParallelIfNumThreadsClause(Ctx, D);
|
||||||
case OMPD_target_teams_distribute_parallel_for:
|
case OMPD_target_teams_distribute_parallel_for:
|
||||||
case OMPD_target_teams_distribute_parallel_for_simd:
|
case OMPD_target_teams_distribute_parallel_for_simd:
|
||||||
return !hasParallelIfNumThreadsClause(Ctx, D);
|
// Distribute with lastprivates requires non-SPMD execution mode.
|
||||||
|
return !hasParallelIfNumThreadsClause(Ctx, D) &&
|
||||||
|
!hasDistributeWithLastprivateClauses(D);
|
||||||
case OMPD_target_simd:
|
case OMPD_target_simd:
|
||||||
|
case OMPD_target_teams_distribute:
|
||||||
|
case OMPD_target_teams_distribute_simd:
|
||||||
|
return false;
|
||||||
|
case OMPD_parallel:
|
||||||
|
case OMPD_for:
|
||||||
|
case OMPD_parallel_for:
|
||||||
|
case OMPD_parallel_sections:
|
||||||
|
case OMPD_for_simd:
|
||||||
|
case OMPD_parallel_for_simd:
|
||||||
|
case OMPD_cancel:
|
||||||
|
case OMPD_cancellation_point:
|
||||||
|
case OMPD_ordered:
|
||||||
|
case OMPD_threadprivate:
|
||||||
|
case OMPD_task:
|
||||||
|
case OMPD_simd:
|
||||||
|
case OMPD_sections:
|
||||||
|
case OMPD_section:
|
||||||
|
case OMPD_single:
|
||||||
|
case OMPD_master:
|
||||||
|
case OMPD_critical:
|
||||||
|
case OMPD_taskyield:
|
||||||
|
case OMPD_barrier:
|
||||||
|
case OMPD_taskwait:
|
||||||
|
case OMPD_taskgroup:
|
||||||
|
case OMPD_atomic:
|
||||||
|
case OMPD_flush:
|
||||||
|
case OMPD_teams:
|
||||||
|
case OMPD_target_data:
|
||||||
|
case OMPD_target_exit_data:
|
||||||
|
case OMPD_target_enter_data:
|
||||||
|
case OMPD_distribute:
|
||||||
|
case OMPD_distribute_simd:
|
||||||
|
case OMPD_distribute_parallel_for:
|
||||||
|
case OMPD_distribute_parallel_for_simd:
|
||||||
|
case OMPD_teams_distribute:
|
||||||
|
case OMPD_teams_distribute_simd:
|
||||||
|
case OMPD_teams_distribute_parallel_for:
|
||||||
|
case OMPD_teams_distribute_parallel_for_simd:
|
||||||
|
case OMPD_target_update:
|
||||||
|
case OMPD_declare_simd:
|
||||||
|
case OMPD_declare_target:
|
||||||
|
case OMPD_end_declare_target:
|
||||||
|
case OMPD_declare_reduction:
|
||||||
|
case OMPD_taskloop:
|
||||||
|
case OMPD_taskloop_simd:
|
||||||
|
case OMPD_unknown:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
llvm_unreachable(
|
||||||
|
"Unknown programming model for OpenMP directive on NVPTX target.");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if the directive is loops based and has schedule clause at all or has
|
||||||
|
/// static scheduling.
|
||||||
|
static bool hasStaticScheduling(const OMPExecutableDirective &D) {
|
||||||
|
assert(isOpenMPWorksharingDirective(D.getDirectiveKind()) &&
|
||||||
|
isOpenMPLoopDirective(D.getDirectiveKind()) &&
|
||||||
|
"Expected loop-based directive.");
|
||||||
|
return !D.hasClausesOfKind<OMPOrderedClause>() &&
|
||||||
|
(!D.hasClausesOfKind<OMPScheduleClause>() ||
|
||||||
|
llvm::any_of(D.getClausesOfKind<OMPScheduleClause>(),
|
||||||
|
[](const OMPScheduleClause *C) {
|
||||||
|
return C->getScheduleKind() == OMPC_SCHEDULE_static;
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check for inner (nested) lightweight runtime construct, if any
|
||||||
|
static bool hasNestedLightweightDirective(ASTContext &Ctx,
|
||||||
|
const OMPExecutableDirective &D) {
|
||||||
|
assert(supportsSPMDExecutionMode(Ctx, D) && "Expected SPMD mode directive.");
|
||||||
|
const auto *CS = D.getInnermostCapturedStmt();
|
||||||
|
const auto *Body =
|
||||||
|
CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
|
||||||
|
const Stmt *ChildStmt = getSingleCompoundChild(Body);
|
||||||
|
|
||||||
|
if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
||||||
|
OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
|
||||||
|
switch (D.getDirectiveKind()) {
|
||||||
|
case OMPD_target:
|
||||||
|
if (isOpenMPParallelDirective(DKind) &&
|
||||||
|
isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) &&
|
||||||
|
hasStaticScheduling(*NestedDir))
|
||||||
|
return true;
|
||||||
|
if (DKind == OMPD_parallel) {
|
||||||
|
Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
|
||||||
|
/*IgnoreCaptured=*/true);
|
||||||
|
if (!Body)
|
||||||
|
return false;
|
||||||
|
ChildStmt = getSingleCompoundChild(Body);
|
||||||
|
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
||||||
|
DKind = NND->getDirectiveKind();
|
||||||
|
if (isOpenMPWorksharingDirective(DKind) &&
|
||||||
|
isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else if (DKind == OMPD_teams) {
|
||||||
|
Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
|
||||||
|
/*IgnoreCaptured=*/true);
|
||||||
|
if (!Body)
|
||||||
|
return false;
|
||||||
|
ChildStmt = getSingleCompoundChild(Body);
|
||||||
|
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
||||||
|
DKind = NND->getDirectiveKind();
|
||||||
|
if (isOpenMPParallelDirective(DKind) &&
|
||||||
|
isOpenMPWorksharingDirective(DKind) &&
|
||||||
|
isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
|
||||||
|
return true;
|
||||||
|
if (DKind == OMPD_parallel) {
|
||||||
|
Body = NND->getInnermostCapturedStmt()->IgnoreContainers(
|
||||||
|
/*IgnoreCaptured=*/true);
|
||||||
|
if (!Body)
|
||||||
|
return false;
|
||||||
|
ChildStmt = getSingleCompoundChild(Body);
|
||||||
|
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
||||||
|
DKind = NND->getDirectiveKind();
|
||||||
|
if (isOpenMPWorksharingDirective(DKind) &&
|
||||||
|
isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
case OMPD_target_teams:
|
||||||
|
if (isOpenMPParallelDirective(DKind) &&
|
||||||
|
isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) &&
|
||||||
|
hasStaticScheduling(*NestedDir))
|
||||||
|
return true;
|
||||||
|
if (DKind == OMPD_parallel) {
|
||||||
|
Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
|
||||||
|
/*IgnoreCaptured=*/true);
|
||||||
|
if (!Body)
|
||||||
|
return false;
|
||||||
|
ChildStmt = getSingleCompoundChild(Body);
|
||||||
|
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
||||||
|
DKind = NND->getDirectiveKind();
|
||||||
|
if (isOpenMPWorksharingDirective(DKind) &&
|
||||||
|
isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
case OMPD_target_parallel:
|
||||||
|
return isOpenMPWorksharingDirective(DKind) &&
|
||||||
|
isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir);
|
||||||
|
case OMPD_target_teams_distribute:
|
||||||
|
case OMPD_target_simd:
|
||||||
|
case OMPD_target_parallel_for:
|
||||||
|
case OMPD_target_parallel_for_simd:
|
||||||
|
case OMPD_target_teams_distribute_simd:
|
||||||
|
case OMPD_target_teams_distribute_parallel_for:
|
||||||
|
case OMPD_target_teams_distribute_parallel_for_simd:
|
||||||
|
case OMPD_parallel:
|
||||||
|
case OMPD_for:
|
||||||
|
case OMPD_parallel_for:
|
||||||
|
case OMPD_parallel_sections:
|
||||||
|
case OMPD_for_simd:
|
||||||
|
case OMPD_parallel_for_simd:
|
||||||
|
case OMPD_cancel:
|
||||||
|
case OMPD_cancellation_point:
|
||||||
|
case OMPD_ordered:
|
||||||
|
case OMPD_threadprivate:
|
||||||
|
case OMPD_task:
|
||||||
|
case OMPD_simd:
|
||||||
|
case OMPD_sections:
|
||||||
|
case OMPD_section:
|
||||||
|
case OMPD_single:
|
||||||
|
case OMPD_master:
|
||||||
|
case OMPD_critical:
|
||||||
|
case OMPD_taskyield:
|
||||||
|
case OMPD_barrier:
|
||||||
|
case OMPD_taskwait:
|
||||||
|
case OMPD_taskgroup:
|
||||||
|
case OMPD_atomic:
|
||||||
|
case OMPD_flush:
|
||||||
|
case OMPD_teams:
|
||||||
|
case OMPD_target_data:
|
||||||
|
case OMPD_target_exit_data:
|
||||||
|
case OMPD_target_enter_data:
|
||||||
|
case OMPD_distribute:
|
||||||
|
case OMPD_distribute_simd:
|
||||||
|
case OMPD_distribute_parallel_for:
|
||||||
|
case OMPD_distribute_parallel_for_simd:
|
||||||
|
case OMPD_teams_distribute:
|
||||||
|
case OMPD_teams_distribute_simd:
|
||||||
|
case OMPD_teams_distribute_parallel_for:
|
||||||
|
case OMPD_teams_distribute_parallel_for_simd:
|
||||||
|
case OMPD_target_update:
|
||||||
|
case OMPD_declare_simd:
|
||||||
|
case OMPD_declare_target:
|
||||||
|
case OMPD_end_declare_target:
|
||||||
|
case OMPD_declare_reduction:
|
||||||
|
case OMPD_taskloop:
|
||||||
|
case OMPD_taskloop_simd:
|
||||||
|
case OMPD_unknown:
|
||||||
|
llvm_unreachable("Unexpected directive.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Checks if the construct supports lightweight runtime. It must be SPMD
|
||||||
|
/// construct + inner loop-based construct with static scheduling.
|
||||||
|
static bool supportsLightweightRuntime(ASTContext &Ctx,
|
||||||
|
const OMPExecutableDirective &D) {
|
||||||
|
if (!supportsSPMDExecutionMode(Ctx, D))
|
||||||
|
return false;
|
||||||
|
OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
|
||||||
|
switch (DirectiveKind) {
|
||||||
|
case OMPD_target:
|
||||||
|
case OMPD_target_teams:
|
||||||
|
case OMPD_target_parallel:
|
||||||
|
return hasNestedLightweightDirective(Ctx, D);
|
||||||
|
case OMPD_target_parallel_for:
|
||||||
|
case OMPD_target_parallel_for_simd:
|
||||||
|
case OMPD_target_teams_distribute_parallel_for:
|
||||||
|
case OMPD_target_teams_distribute_parallel_for_simd:
|
||||||
|
// (Last|First)-privates must be shared in parallel region.
|
||||||
|
return hasStaticScheduling(D);
|
||||||
|
case OMPD_target_simd:
|
||||||
|
case OMPD_target_teams_distribute:
|
||||||
case OMPD_target_teams_distribute_simd:
|
case OMPD_target_teams_distribute_simd:
|
||||||
return false;
|
return false;
|
||||||
case OMPD_parallel:
|
case OMPD_parallel:
|
||||||
|
@ -1010,18 +1218,20 @@ void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader(
|
||||||
EST.ExitBB = CGF.createBasicBlock(".exit");
|
EST.ExitBB = CGF.createBasicBlock(".exit");
|
||||||
|
|
||||||
// Initialize the OMP state in the runtime; called by all active threads.
|
// Initialize the OMP state in the runtime; called by all active threads.
|
||||||
// TODO: Set RequiresOMPRuntime and RequiresDataSharing parameters
|
bool RequiresFullRuntime = !supportsLightweightRuntime(CGF.getContext(), D);
|
||||||
// based on code analysis of the target region.
|
llvm::Value *Args[] = {
|
||||||
llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true),
|
getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true),
|
||||||
/*RequiresOMPRuntime=*/Bld.getInt16(1),
|
/*RequiresOMPRuntime=*/
|
||||||
/*RequiresDataSharing=*/Bld.getInt16(1)};
|
Bld.getInt16(RequiresFullRuntime ? 1 : 0),
|
||||||
|
/*RequiresDataSharing=*/Bld.getInt16(RequiresFullRuntime ? 1 : 0)};
|
||||||
CGF.EmitRuntimeCall(
|
CGF.EmitRuntimeCall(
|
||||||
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
|
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
|
||||||
|
|
||||||
|
if (RequiresFullRuntime) {
|
||||||
// For data sharing, we need to initialize the stack.
|
// For data sharing, we need to initialize the stack.
|
||||||
CGF.EmitRuntimeCall(
|
CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
|
||||||
createNVPTXRuntimeFunction(
|
|
||||||
OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd));
|
OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd));
|
||||||
|
}
|
||||||
|
|
||||||
CGF.EmitBranch(ExecuteBB);
|
CGF.EmitBranch(ExecuteBB);
|
||||||
|
|
||||||
|
@ -1414,7 +1624,8 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
|
||||||
/// Build void __kmpc_data_sharing_init_stack_spmd();
|
/// Build void __kmpc_data_sharing_init_stack_spmd();
|
||||||
auto *FnTy =
|
auto *FnTy =
|
||||||
llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
|
llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
|
||||||
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd");
|
RTLFn =
|
||||||
|
CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: {
|
case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: {
|
||||||
|
@ -1607,6 +1818,7 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
|
||||||
.emitGenericVarsEpilog(CGF);
|
.emitGenericVarsEpilog(CGF);
|
||||||
}
|
}
|
||||||
} Action(Loc);
|
} Action(Loc);
|
||||||
|
if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD)
|
||||||
CodeGen.setAction(Action);
|
CodeGen.setAction(Action);
|
||||||
llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction(
|
llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction(
|
||||||
D, ThreadIDVar, InnermostKind, CodeGen);
|
D, ThreadIDVar, InnermostKind, CodeGen);
|
||||||
|
@ -1640,19 +1852,61 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
|
||||||
unsigned GlobalRecordSize =
|
unsigned GlobalRecordSize =
|
||||||
CGM.getContext().getTypeSizeInChars(RecTy).getQuantity();
|
CGM.getContext().getTypeSizeInChars(RecTy).getQuantity();
|
||||||
GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
|
GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
|
||||||
|
|
||||||
|
llvm::Value *GlobalRecCastAddr;
|
||||||
|
if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {
|
||||||
|
llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
|
||||||
|
llvm::BasicBlock *SPMDBB = CGF.createBasicBlock(".spmd");
|
||||||
|
llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");
|
||||||
|
llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
|
||||||
|
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
|
||||||
|
Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB);
|
||||||
|
// There is no need to emit line number for unconditional branch.
|
||||||
|
(void)ApplyDebugLocation::CreateEmpty(CGF);
|
||||||
|
CGF.EmitBlock(SPMDBB);
|
||||||
|
Address RecPtr = CGF.CreateMemTemp(RecTy, "_local_stack");
|
||||||
|
CGF.EmitBranch(ExitBB);
|
||||||
|
// There is no need to emit line number for unconditional branch.
|
||||||
|
(void)ApplyDebugLocation::CreateEmpty(CGF);
|
||||||
|
CGF.EmitBlock(NonSPMDBB);
|
||||||
// TODO: allow the usage of shared memory to be controlled by
|
// TODO: allow the usage of shared memory to be controlled by
|
||||||
// the user, for now, default to global.
|
// the user, for now, default to global.
|
||||||
llvm::Value *GlobalRecordSizeArg[] = {
|
llvm::Value *GlobalRecordSizeArg[] = {
|
||||||
llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
|
llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
|
||||||
CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
|
CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
|
||||||
llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
|
llvm::Value *GlobalRecValue =
|
||||||
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
|
CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
|
||||||
|
OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
|
||||||
GlobalRecordSizeArg);
|
GlobalRecordSizeArg);
|
||||||
llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
|
GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
|
||||||
GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo());
|
GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo());
|
||||||
|
CGF.EmitBlock(ExitBB);
|
||||||
|
auto *Phi = Bld.CreatePHI(GlobalRecCastAddr->getType(),
|
||||||
|
/*NumReservedValues=*/2, "_select_stack");
|
||||||
|
Phi->addIncoming(RecPtr.getPointer(), SPMDBB);
|
||||||
|
Phi->addIncoming(GlobalRecCastAddr, NonSPMDBB);
|
||||||
|
GlobalRecCastAddr = Phi;
|
||||||
|
I->getSecond().GlobalRecordAddr = Phi;
|
||||||
|
I->getSecond().IsInSPMDModeFlag = IsSPMD;
|
||||||
|
} else {
|
||||||
|
assert(getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_NonSPMD &&
|
||||||
|
"Expected Non-SPMD construct.");
|
||||||
|
// TODO: allow the usage of shared memory to be controlled by
|
||||||
|
// the user, for now, default to global.
|
||||||
|
llvm::Value *GlobalRecordSizeArg[] = {
|
||||||
|
llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
|
||||||
|
CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
|
||||||
|
llvm::Value *GlobalRecValue =
|
||||||
|
CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
|
||||||
|
OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
|
||||||
|
GlobalRecordSizeArg);
|
||||||
|
GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
|
||||||
|
GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo());
|
||||||
|
I->getSecond().GlobalRecordAddr = GlobalRecValue;
|
||||||
|
I->getSecond().IsInSPMDModeFlag = nullptr;
|
||||||
|
}
|
||||||
LValue Base =
|
LValue Base =
|
||||||
CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, RecTy);
|
CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, RecTy);
|
||||||
I->getSecond().GlobalRecordAddr = GlobalRecValue;
|
|
||||||
|
|
||||||
// Emit the "global alloca" which is a GEP from the global declaration
|
// Emit the "global alloca" which is a GEP from the global declaration
|
||||||
// record using the pointer returned by the runtime.
|
// record using the pointer returned by the runtime.
|
||||||
|
@ -1724,11 +1978,28 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF) {
|
||||||
Addr);
|
Addr);
|
||||||
}
|
}
|
||||||
if (I->getSecond().GlobalRecordAddr) {
|
if (I->getSecond().GlobalRecordAddr) {
|
||||||
|
if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {
|
||||||
|
CGBuilderTy &Bld = CGF.Builder;
|
||||||
|
llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
|
||||||
|
llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");
|
||||||
|
Bld.CreateCondBr(I->getSecond().IsInSPMDModeFlag, ExitBB, NonSPMDBB);
|
||||||
|
// There is no need to emit line number for unconditional branch.
|
||||||
|
(void)ApplyDebugLocation::CreateEmpty(CGF);
|
||||||
|
CGF.EmitBlock(NonSPMDBB);
|
||||||
CGF.EmitRuntimeCall(
|
CGF.EmitRuntimeCall(
|
||||||
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
|
createNVPTXRuntimeFunction(
|
||||||
|
OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
|
||||||
|
CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr));
|
||||||
|
CGF.EmitBlock(ExitBB);
|
||||||
|
} else {
|
||||||
|
assert(getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_NonSPMD &&
|
||||||
|
"Expected Non-SPMD mode.");
|
||||||
|
CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
|
||||||
|
OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
|
||||||
I->getSecond().GlobalRecordAddr);
|
I->getSecond().GlobalRecordAddr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF,
|
void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF,
|
||||||
|
|
|
@ -374,6 +374,7 @@ private:
|
||||||
llvm::SmallVector<llvm::Value *, 4> EscapedVariableLengthDeclsAddrs;
|
llvm::SmallVector<llvm::Value *, 4> EscapedVariableLengthDeclsAddrs;
|
||||||
const RecordDecl *GlobalRecord = nullptr;
|
const RecordDecl *GlobalRecord = nullptr;
|
||||||
llvm::Value *GlobalRecordAddr = nullptr;
|
llvm::Value *GlobalRecordAddr = nullptr;
|
||||||
|
llvm::Value *IsInSPMDModeFlag = nullptr;
|
||||||
std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;
|
std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;
|
||||||
};
|
};
|
||||||
/// Maps the function to the list of the globalized variables with their
|
/// Maps the function to the list of the globalized variables with their
|
||||||
|
|
|
@ -35,9 +35,19 @@ int maini1() {
|
||||||
// CHECK-NOT: @__kmpc_data_sharing_push_stack
|
// CHECK-NOT: @__kmpc_data_sharing_push_stack
|
||||||
|
|
||||||
// CHECK: define {{.*}}[[BAR]]()
|
// CHECK: define {{.*}}[[BAR]]()
|
||||||
|
// CHECK: [[STACK:%.+]] = alloca [[GLOBAL_ST:%.+]],
|
||||||
|
// CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode()
|
||||||
|
// CHECK: [[IS_SPMD:%.+]] = icmp ne i8 [[RES]], 0
|
||||||
|
// CHECK: br i1 [[IS_SPMD]], label
|
||||||
|
// CHECK: br label
|
||||||
// CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0)
|
// CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0)
|
||||||
// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_ST:%struct[.].*]]*
|
// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_ST]]*
|
||||||
// CHECK: [[A_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[GLOBALS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
|
// CHECK: br label
|
||||||
|
// CHECK: [[ITEMS:%.+]] = phi [[GLOBAL_ST]]* [ [[STACK]], {{.+}} ], [ [[GLOBALS]], {{.+}} ]
|
||||||
|
// CHECK: [[A_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[ITEMS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
|
||||||
// CHECK: call {{.*}}[[FOO]](i32* dereferenceable{{.*}} [[A_ADDR]])
|
// CHECK: call {{.*}}[[FOO]](i32* dereferenceable{{.*}} [[A_ADDR]])
|
||||||
// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]])
|
// CHECK: br i1 [[IS_SPMD]], label
|
||||||
|
// CHECK: [[BC:%.+]] = bitcast [[GLOBAL_ST]]* [[ITEMS]] to i8*
|
||||||
|
// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[BC]])
|
||||||
|
// CHECK: br label
|
||||||
// CHECK: ret i32
|
// CHECK: ret i32
|
||||||
|
|
|
@ -0,0 +1,328 @@
|
||||||
|
// Test target codegen - host bc file has to be created first.
|
||||||
|
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||||
|
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
|
||||||
|
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
|
||||||
|
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
|
||||||
|
// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
|
||||||
|
// expected-no-diagnostics
|
||||||
|
#ifndef HEADER
|
||||||
|
#define HEADER
|
||||||
|
|
||||||
|
// CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1
|
||||||
|
// CHECK: @__omp_offloading_{{.+}}_l52_exec_mode = weak constant i8 1
|
||||||
|
// CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1
|
||||||
|
|
||||||
|
void foo() {
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
#pragma omp target teams distribute parallel for simd
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target teams distribute parallel for simd schedule(static)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target teams distribute parallel for simd schedule(static, 1)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target teams distribute parallel for simd schedule(auto)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target teams distribute parallel for simd schedule(runtime)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target teams distribute parallel for simd schedule(dynamic)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target teams distribute parallel for simd schedule(guided)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
int a;
|
||||||
|
// CHECK: call void @__kmpc_kernel_init(
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
#pragma omp target teams distribute parallel for lastprivate(a)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
a = i;
|
||||||
|
#pragma omp target teams distribute parallel for schedule(static)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target teams distribute parallel for schedule(static, 1)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target teams distribute parallel for schedule(auto)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target teams distribute parallel for schedule(runtime)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target teams distribute parallel for schedule(dynamic)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target teams distribute parallel for schedule(guided)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
#pragma omp target teams
|
||||||
|
#pragma omp distribute parallel for simd
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target teams
|
||||||
|
#pragma omp distribute parallel for simd schedule(static)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target teams
|
||||||
|
#pragma omp distribute parallel for simd schedule(static, 1)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target teams
|
||||||
|
#pragma omp distribute parallel for simd schedule(auto)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target teams
|
||||||
|
#pragma omp distribute parallel for simd schedule(runtime)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target teams
|
||||||
|
#pragma omp distribute parallel for simd schedule(dynamic)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target teams
|
||||||
|
#pragma omp distribute parallel for simd schedule(guided)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
#pragma omp target teams
|
||||||
|
#pragma omp distribute parallel for
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target teams
|
||||||
|
#pragma omp distribute parallel for schedule(static)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target teams
|
||||||
|
#pragma omp distribute parallel for schedule(static, 1)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target teams
|
||||||
|
#pragma omp distribute parallel for schedule(auto)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target teams
|
||||||
|
#pragma omp distribute parallel for schedule(runtime)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target teams
|
||||||
|
#pragma omp distribute parallel for schedule(dynamic)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target teams
|
||||||
|
#pragma omp distribute parallel for schedule(guided)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
#pragma omp target
|
||||||
|
#pragma omp teams
|
||||||
|
#pragma omp distribute parallel for
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target
|
||||||
|
#pragma omp teams
|
||||||
|
#pragma omp distribute parallel for schedule(static)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target
|
||||||
|
#pragma omp teams
|
||||||
|
#pragma omp distribute parallel for schedule(static, 1)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target
|
||||||
|
#pragma omp teams
|
||||||
|
#pragma omp distribute parallel for schedule(auto)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target
|
||||||
|
#pragma omp teams
|
||||||
|
#pragma omp distribute parallel for schedule(runtime)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target
|
||||||
|
#pragma omp teams
|
||||||
|
#pragma omp distribute parallel for schedule(dynamic)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target
|
||||||
|
#pragma omp teams
|
||||||
|
#pragma omp distribute parallel for schedule(guided)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
#pragma omp target parallel for
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target parallel for schedule(static)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target parallel for schedule(static, 1)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target parallel for schedule(auto)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target parallel for schedule(runtime)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target parallel for schedule(dynamic)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target parallel for schedule(guided)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
#pragma omp target parallel
|
||||||
|
#pragma omp for simd
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target parallel
|
||||||
|
#pragma omp for simd schedule(static)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target parallel
|
||||||
|
#pragma omp for simd schedule(static, 1)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target parallel
|
||||||
|
#pragma omp for simd schedule(auto)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target parallel
|
||||||
|
#pragma omp for simd schedule(runtime)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target parallel
|
||||||
|
#pragma omp for simd schedule(dynamic)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target parallel
|
||||||
|
#pragma omp for simd schedule(guided)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
#pragma omp target
|
||||||
|
#pragma omp parallel
|
||||||
|
#pragma omp for simd ordered
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target
|
||||||
|
#pragma omp parallel
|
||||||
|
#pragma omp for simd schedule(static)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target
|
||||||
|
#pragma omp parallel
|
||||||
|
#pragma omp for simd schedule(static, 1)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target
|
||||||
|
#pragma omp parallel
|
||||||
|
#pragma omp for simd schedule(auto)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target
|
||||||
|
#pragma omp parallel
|
||||||
|
#pragma omp for simd schedule(runtime)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target
|
||||||
|
#pragma omp parallel
|
||||||
|
#pragma omp for simd schedule(dynamic)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target
|
||||||
|
#pragma omp parallel
|
||||||
|
#pragma omp for simd schedule(guided)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||||
|
#pragma omp target
|
||||||
|
#pragma omp parallel for
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target
|
||||||
|
#pragma omp parallel for schedule(static)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target
|
||||||
|
#pragma omp parallel for schedule(static, 1)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target
|
||||||
|
#pragma omp parallel for schedule(auto)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target
|
||||||
|
#pragma omp parallel for schedule(runtime)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target
|
||||||
|
#pragma omp parallel for schedule(dynamic)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
#pragma omp target
|
||||||
|
#pragma omp parallel for schedule(guided)
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
|
@ -554,13 +554,20 @@ int baz(int f, double &a) {
|
||||||
// CHECK: ret void
|
// CHECK: ret void
|
||||||
|
|
||||||
// CHECK: define i32 [[BAZ]](i32 [[F:%.*]], double* dereferenceable{{.*}})
|
// CHECK: define i32 [[BAZ]](i32 [[F:%.*]], double* dereferenceable{{.*}})
|
||||||
|
// CHECK: [[STACK:%.+]] = alloca [[GLOBAL_ST:%.+]],
|
||||||
// CHECK: [[ZERO_ADDR:%.+]] = alloca i32,
|
// CHECK: [[ZERO_ADDR:%.+]] = alloca i32,
|
||||||
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t*
|
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t*
|
||||||
// CHECK: [[GTID_ADDR:%.+]] = alloca i32,
|
// CHECK: [[GTID_ADDR:%.+]] = alloca i32,
|
||||||
// CHECK: store i32 0, i32* [[ZERO_ADDR]]
|
// CHECK: store i32 0, i32* [[ZERO_ADDR]]
|
||||||
|
// CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode()
|
||||||
|
// CHECK: [[IS_SPMD:%.+]] = icmp ne i8 [[RES]], 0
|
||||||
|
// CHECK: br i1 [[IS_SPMD]], label
|
||||||
|
// CHECK: br label
|
||||||
// CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 4, i16 0)
|
// CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 4, i16 0)
|
||||||
// CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to %struct._globalized_locals_ty*
|
// CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to [[GLOBAL_ST]]*
|
||||||
// CHECK: [[F_PTR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[REC_ADDR]], i32 0, i32 0
|
// CHECK: br label
|
||||||
|
// CHECK: [[ITEMS:%.+]] = phi [[GLOBAL_ST]]* [ [[STACK]], {{.+}} ], [ [[REC_ADDR]], {{.+}} ]
|
||||||
|
// CHECK: [[F_PTR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[ITEMS]], i32 0, i32 0
|
||||||
// CHECK: store i32 %{{.+}}, i32* [[F_PTR]],
|
// CHECK: store i32 %{{.+}}, i32* [[F_PTR]],
|
||||||
|
|
||||||
// CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode()
|
// CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode()
|
||||||
|
@ -595,7 +602,12 @@ int baz(int f, double &a) {
|
||||||
// CHECK: br label
|
// CHECK: br label
|
||||||
|
|
||||||
// CHECK: [[RES:%.+]] = load i32, i32* [[F_PTR]],
|
// CHECK: [[RES:%.+]] = load i32, i32* [[F_PTR]],
|
||||||
// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[PTR]])
|
// CHECK: store i32 [[RES]], i32* [[RET:%.+]],
|
||||||
|
// CHECK: br i1 [[IS_SPMD]], label
|
||||||
|
// CHECK: [[BC:%.+]] = bitcast [[GLOBAL_ST]]* [[ITEMS]] to i8*
|
||||||
|
// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[BC]])
|
||||||
|
// CHECK: br label
|
||||||
|
// CHECK: [[RES:%.+]] = load i32, i32* [[RET]],
|
||||||
// CHECK: ret i32 [[RES]]
|
// CHECK: ret i32 [[RES]]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -59,7 +59,7 @@ int bar(int n){
|
||||||
// CHECK: store i16* {{%.+}}, i16** [[AA_ADDR]], align
|
// CHECK: store i16* {{%.+}}, i16** [[AA_ADDR]], align
|
||||||
// CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align
|
// CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align
|
||||||
// CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
// CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1, i16 1)
|
||||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
||||||
// CHECK: br label {{%?}}[[EXEC:.+]]
|
// CHECK: br label {{%?}}[[EXEC:.+]]
|
||||||
//
|
//
|
||||||
|
@ -102,7 +102,7 @@ int bar(int n){
|
||||||
// CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align
|
// CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align
|
||||||
// CHECK: [[B:%.+]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align
|
// CHECK: [[B:%.+]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align
|
||||||
// CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
// CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1, i16 1)
|
||||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
||||||
// CHECK: br label {{%?}}[[EXEC:.+]]
|
// CHECK: br label {{%?}}[[EXEC:.+]]
|
||||||
//
|
//
|
||||||
|
|
|
@ -47,7 +47,7 @@ int bar(int n){
|
||||||
}
|
}
|
||||||
|
|
||||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l22}}(
|
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l22}}(
|
||||||
// CHECK: call void @__kmpc_spmd_kernel_init(
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
|
||||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
||||||
// CHECK: br label {{%?}}[[EXEC:.+]]
|
// CHECK: br label {{%?}}[[EXEC:.+]]
|
||||||
//
|
//
|
||||||
|
@ -69,7 +69,7 @@ int bar(int n){
|
||||||
|
|
||||||
|
|
||||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l26}}(
|
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l26}}(
|
||||||
// CHECK: call void @__kmpc_spmd_kernel_init(
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
|
||||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
||||||
// CHECK: br label {{%?}}[[EXEC:.+]]
|
// CHECK: br label {{%?}}[[EXEC:.+]]
|
||||||
//
|
//
|
||||||
|
@ -90,7 +90,7 @@ int bar(int n){
|
||||||
|
|
||||||
|
|
||||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l31}}(
|
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l31}}(
|
||||||
// CHECK: call void @__kmpc_spmd_kernel_init(
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
|
||||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
||||||
// CHECK: br label {{%?}}[[EXEC:.+]]
|
// CHECK: br label {{%?}}[[EXEC:.+]]
|
||||||
//
|
//
|
||||||
|
|
|
@ -54,7 +54,7 @@ int bar(int n){
|
||||||
|
|
||||||
// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l27}}(
|
// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l27}}(
|
||||||
//
|
//
|
||||||
// CHECK: call void @__kmpc_spmd_kernel_init(
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
|
||||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
||||||
// CHECK: br label {{%?}}[[EXECUTE:.+]]
|
// CHECK: br label {{%?}}[[EXECUTE:.+]]
|
||||||
//
|
//
|
||||||
|
@ -242,7 +242,7 @@ int bar(int n){
|
||||||
|
|
||||||
// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l32}}(
|
// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l32}}(
|
||||||
//
|
//
|
||||||
// CHECK: call void @__kmpc_spmd_kernel_init(
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
|
||||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
||||||
// CHECK: br label {{%?}}[[EXECUTE:.+]]
|
// CHECK: br label {{%?}}[[EXECUTE:.+]]
|
||||||
//
|
//
|
||||||
|
@ -520,7 +520,7 @@ int bar(int n){
|
||||||
|
|
||||||
// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l38}}(
|
// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l38}}(
|
||||||
//
|
//
|
||||||
// CHECK: call void @__kmpc_spmd_kernel_init(
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
|
||||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
||||||
// CHECK: br label {{%?}}[[EXECUTE:.+]]
|
// CHECK: br label {{%?}}[[EXECUTE:.+]]
|
||||||
//
|
//
|
||||||
|
|
|
@ -227,13 +227,13 @@ int bar(int n){
|
||||||
// CHECK: ret void
|
// CHECK: ret void
|
||||||
|
|
||||||
// CHECK: define weak void @__omp_offloading_{{.*}}ftemplate{{.*}}_l37(
|
// CHECK: define weak void @__omp_offloading_{{.*}}ftemplate{{.*}}_l37(
|
||||||
// CHECK: call void @__kmpc_spmd_kernel_init(
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
|
||||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
||||||
// CHECK: call i8* @__kmpc_data_sharing_push_stack(
|
// CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack(
|
||||||
// CHECK-NOT: call void @__kmpc_serialized_parallel(
|
// CHECK-NOT: call void @__kmpc_serialized_parallel(
|
||||||
// CHECK: call void [[L0:@.+]](i32* %{{.+}}, i32* %{{.+}}, i16* %{{.*}})
|
// CHECK: call void [[L0:@.+]](i32* %{{.+}}, i32* %{{.+}}, i16* %{{.*}})
|
||||||
// CHECK-NOT: call void @__kmpc_end_serialized_parallel(
|
// CHECK-NOT: call void @__kmpc_end_serialized_parallel(
|
||||||
// CHECK: call void @__kmpc_data_sharing_pop_stack(
|
// CHECK-NOT: call void @__kmpc_data_sharing_pop_stack(
|
||||||
// CHECK: call void @__kmpc_spmd_kernel_deinit()
|
// CHECK: call void @__kmpc_spmd_kernel_deinit()
|
||||||
// CHECK: ret
|
// CHECK: ret
|
||||||
|
|
||||||
|
|
|
@ -8,12 +8,13 @@
|
||||||
#ifndef HEADER
|
#ifndef HEADER
|
||||||
#define HEADER
|
#define HEADER
|
||||||
|
|
||||||
// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode.
|
// Check that the execution mode of the target region with lastprivates on the gpu is set to Non-SPMD Mode.
|
||||||
// CHECK-DAG: {{@__omp_offloading_.+l32}}_exec_mode = weak constant i8 0
|
// CHECK-DAG: {{@__omp_offloading_.+l33}}_exec_mode = weak constant i8 1
|
||||||
// CHECK-DAG: {{@__omp_offloading_.+l38}}_exec_mode = weak constant i8 0
|
// Check that the execution mode of all 4 target regions on the gpu is set to SPMD Mode.
|
||||||
// CHECK-DAG: {{@__omp_offloading_.+l43}}_exec_mode = weak constant i8 0
|
// CHECK-DAG: {{@__omp_offloading_.+l39}}_exec_mode = weak constant i8 0
|
||||||
// CHECK-DAG: {{@__omp_offloading_.+l48}}_exec_mode = weak constant i8 0
|
// CHECK-DAG: {{@__omp_offloading_.+l44}}_exec_mode = weak constant i8 0
|
||||||
// CHECK-DAG: {{@__omp_offloading_.+l56}}_exec_mode = weak constant i8 0
|
// CHECK-DAG: {{@__omp_offloading_.+l49}}_exec_mode = weak constant i8 0
|
||||||
|
// CHECK-DAG: {{@__omp_offloading_.+l57}}_exec_mode = weak constant i8 0
|
||||||
|
|
||||||
#define N 1000
|
#define N 1000
|
||||||
#define M 10
|
#define M 10
|
||||||
|
@ -67,14 +68,14 @@ int bar(int n){
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
|
// CHECK_LABEL: define internal void @__omp_offloading_{{.+}}_l33_worker()
|
||||||
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
|
||||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
|
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l33(
|
||||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1)
|
||||||
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
|
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
|
||||||
// CHECK: {{call|invoke}} void [[OUTL1:@.+]](
|
// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[OUTL1:@__omp_outlined.*]]_wrapper to i8*), i16 1)
|
||||||
// CHECK: call void @__kmpc_for_static_fini(
|
// CHECK: call void @__kmpc_for_static_fini(
|
||||||
// CHECK: call void @__kmpc_spmd_kernel_deinit()
|
// CHECK: call void @__kmpc_kernel_deinit(i16 1)
|
||||||
// CHECK: ret void
|
// CHECK: ret void
|
||||||
|
|
||||||
// CHECK: define internal void [[OUTL1]](
|
// CHECK: define internal void [[OUTL1]](
|
||||||
|
@ -84,8 +85,7 @@ int bar(int n){
|
||||||
|
|
||||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
|
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
|
||||||
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
|
||||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
|
||||||
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
|
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
|
||||||
// CHECK: {{call|invoke}} void [[OUTL2:@.+]](
|
// CHECK: {{call|invoke}} void [[OUTL2:@.+]](
|
||||||
// CHECK: call void @__kmpc_for_static_fini(
|
// CHECK: call void @__kmpc_for_static_fini(
|
||||||
|
@ -99,8 +99,7 @@ int bar(int n){
|
||||||
|
|
||||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
|
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
|
||||||
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
|
||||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
|
||||||
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
|
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
|
||||||
// CHECK: {{call|invoke}} void [[OUTL3:@.+]](
|
// CHECK: {{call|invoke}} void [[OUTL3:@.+]](
|
||||||
// CHECK: call void @__kmpc_for_static_fini(
|
// CHECK: call void @__kmpc_for_static_fini(
|
||||||
|
@ -115,8 +114,7 @@ int bar(int n){
|
||||||
// CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]])
|
// CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]])
|
||||||
// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},
|
// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},
|
||||||
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
|
||||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
|
||||||
// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
|
// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
|
||||||
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
|
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
|
||||||
// CHECK: {{call|invoke}} void [[OUTL4:@.+]](
|
// CHECK: {{call|invoke}} void [[OUTL4:@.+]](
|
||||||
|
@ -129,7 +127,7 @@ int bar(int n){
|
||||||
// CHECK: call void @__kmpc_for_static_fini(
|
// CHECK: call void @__kmpc_for_static_fini(
|
||||||
// CHECK: ret void
|
// CHECK: ret void
|
||||||
|
|
||||||
// CHECK: define weak void @__omp_offloading_{{.*}}_l56(i[[SZ:64|32]] %{{[^,]+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{[^)]+}})
|
// CHECK: define weak void @__omp_offloading_{{.*}}_l57(i[[SZ:64|32]] %{{[^,]+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{[^)]+}})
|
||||||
// CHECK: call void [[OUTLINED:@__omp_outlined.*]](i32* %{{.+}}, i32* %{{.+}}, i[[SZ]] %{{.*}}, i[[SZ]] %{{.*}}, i[[SZ]] %{{.*}}, [1000 x i32]* %{{.*}}, i32* %{{.*}})
|
// CHECK: call void [[OUTLINED:@__omp_outlined.*]](i32* %{{.+}}, i32* %{{.+}}, i[[SZ]] %{{.*}}, i[[SZ]] %{{.*}}, i[[SZ]] %{{.*}}, [1000 x i32]* %{{.*}}, i32* %{{.*}})
|
||||||
// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}} i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{.*}})
|
// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}} i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{.*}})
|
||||||
|
|
||||||
|
|
|
@ -8,11 +8,12 @@
|
||||||
#ifndef HEADER
|
#ifndef HEADER
|
||||||
#define HEADER
|
#define HEADER
|
||||||
|
|
||||||
// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode.
|
// Check that the execution mode of the target region with lastprivates on the gpu is set to Non-SPMD Mode.
|
||||||
// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0
|
// CHECK-DAG: {{@__omp_offloading_.+l31}}_exec_mode = weak constant i8 1
|
||||||
// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 0
|
// Check that the execution mode of all 3 target regions on the gpu is set to SPMD Mode.
|
||||||
// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 0
|
// CHECK-DAG: {{@__omp_offloading_.+l37}}_exec_mode = weak constant i8 0
|
||||||
// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 0
|
// CHECK-DAG: {{@__omp_offloading_.+l42}}_exec_mode = weak constant i8 0
|
||||||
|
// CHECK-DAG: {{@__omp_offloading_.+l47}}_exec_mode = weak constant i8 0
|
||||||
|
|
||||||
#define N 1000
|
#define N 1000
|
||||||
#define M 10
|
#define M 10
|
||||||
|
@ -62,14 +63,14 @@ int bar(int n){
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
|
// CHECK_LABEL: define internal void @__omp_offloading_{{.+}}_l31_worker()
|
||||||
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
|
||||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
|
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l31(
|
||||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1)
|
||||||
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
|
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
|
||||||
// CHECK: {{call|invoke}} void [[OUTL1:@.+]](
|
// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[OUTL1:@__omp_outlined.*]]_wrapper to i8*), i16 1)
|
||||||
// CHECK: call void @__kmpc_for_static_fini(
|
// CHECK: call void @__kmpc_for_static_fini(
|
||||||
// CHECK: call void @__kmpc_spmd_kernel_deinit()
|
// CHECK: call void @__kmpc_kernel_deinit(i16 1)
|
||||||
// CHECK: ret void
|
// CHECK: ret void
|
||||||
|
|
||||||
// CHECK: define internal void [[OUTL1]](
|
// CHECK: define internal void [[OUTL1]](
|
||||||
|
@ -79,8 +80,7 @@ int bar(int n){
|
||||||
|
|
||||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
|
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
|
||||||
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
|
||||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
|
||||||
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
|
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
|
||||||
// CHECK: {{call|invoke}} void [[OUTL2:@.+]](
|
// CHECK: {{call|invoke}} void [[OUTL2:@.+]](
|
||||||
// CHECK: call void @__kmpc_for_static_fini(
|
// CHECK: call void @__kmpc_for_static_fini(
|
||||||
|
@ -94,8 +94,7 @@ int bar(int n){
|
||||||
|
|
||||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
|
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
|
||||||
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
|
||||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
|
||||||
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
|
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
|
||||||
// CHECK: {{call|invoke}} void [[OUTL3:@.+]](
|
// CHECK: {{call|invoke}} void [[OUTL3:@.+]](
|
||||||
// CHECK: call void @__kmpc_for_static_fini(
|
// CHECK: call void @__kmpc_for_static_fini(
|
||||||
|
@ -110,8 +109,7 @@ int bar(int n){
|
||||||
// CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]])
|
// CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]])
|
||||||
// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},
|
// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},
|
||||||
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
|
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
|
||||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
|
||||||
// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
|
// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
|
||||||
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
|
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
|
||||||
// CHECK: {{call|invoke}} void [[OUTL4:@.+]](
|
// CHECK: {{call|invoke}} void [[OUTL4:@.+]](
|
||||||
|
|
Loading…
Reference in New Issue