[OPENMP][NVPTX] Add support for lightweight runtime.

If the target construct can be executed in SPMD mode + it is a loop
based directive with static scheduling, we can use lightweight runtime
support.

llvm-svn: 340953
This commit is contained in:
Alexey Bataev 2018-08-29 18:32:21 +00:00
parent 0f29e953b7
commit 8d8e1235ab
11 changed files with 733 additions and 115 deletions

View File

@ -672,11 +672,19 @@ static bool hasParallelIfNumThreadsClause(ASTContext &Ctx,
return false; return false;
} }
/// Checks if the directive is the distribute clause with the lastprivate
/// clauses. This construct does not support SPMD execution mode.
static bool hasDistributeWithLastprivateClauses(const OMPExecutableDirective &D) {
return isOpenMPDistributeDirective(D.getDirectiveKind()) &&
D.hasClausesOfKind<OMPLastprivateClause>();
}
/// Check for inner (nested) SPMD construct, if any /// Check for inner (nested) SPMD construct, if any
static bool hasNestedSPMDDirective(ASTContext &Ctx, static bool hasNestedSPMDDirective(ASTContext &Ctx,
const OMPExecutableDirective &D) { const OMPExecutableDirective &D) {
const auto *CS = D.getInnermostCapturedStmt(); const auto *CS = D.getInnermostCapturedStmt();
const auto *Body = CS->getCapturedStmt()->IgnoreContainers(); const auto *Body =
CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
const Stmt *ChildStmt = getSingleCompoundChild(Body); const Stmt *ChildStmt = getSingleCompoundChild(Body);
if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) { if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
@ -684,57 +692,33 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
switch (D.getDirectiveKind()) { switch (D.getDirectiveKind()) {
case OMPD_target: case OMPD_target:
if (isOpenMPParallelDirective(DKind) && if (isOpenMPParallelDirective(DKind) &&
!hasParallelIfNumThreadsClause(Ctx, *NestedDir)) !hasParallelIfNumThreadsClause(Ctx, *NestedDir) &&
!hasDistributeWithLastprivateClauses(*NestedDir))
return true; return true;
if (DKind == OMPD_teams || DKind == OMPD_teams_distribute) { if (DKind == OMPD_teams) {
Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(); Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
/*IgnoreCaptured=*/true);
if (!Body) if (!Body)
return false; return false;
ChildStmt = getSingleCompoundChild(Body); ChildStmt = getSingleCompoundChild(Body);
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind(); DKind = NND->getDirectiveKind();
if (isOpenMPParallelDirective(DKind) && if (isOpenMPParallelDirective(DKind) &&
!hasParallelIfNumThreadsClause(Ctx, *NND)) !hasParallelIfNumThreadsClause(Ctx, *NND) &&
!hasDistributeWithLastprivateClauses(*NND))
return true; return true;
if (DKind == OMPD_distribute) {
Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
if (!Body)
return false;
ChildStmt = getSingleCompoundChild(Body);
if (!ChildStmt)
return false;
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
return isOpenMPParallelDirective(DKind) &&
!hasParallelIfNumThreadsClause(Ctx, *NND);
}
}
} }
} }
return false; return false;
case OMPD_target_teams: case OMPD_target_teams:
if (isOpenMPParallelDirective(DKind) &&
!hasParallelIfNumThreadsClause(Ctx, *NestedDir))
return true;
if (DKind == OMPD_distribute) {
Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
if (!Body)
return false;
ChildStmt = getSingleCompoundChild(Body);
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
return isOpenMPParallelDirective(DKind) && return isOpenMPParallelDirective(DKind) &&
!hasParallelIfNumThreadsClause(Ctx, *NND); !hasParallelIfNumThreadsClause(Ctx, *NestedDir) &&
} !hasDistributeWithLastprivateClauses(*NestedDir);
}
return false;
case OMPD_target_teams_distribute:
return isOpenMPParallelDirective(DKind) &&
!hasParallelIfNumThreadsClause(Ctx, *NestedDir);
case OMPD_target_simd: case OMPD_target_simd:
case OMPD_target_parallel: case OMPD_target_parallel:
case OMPD_target_parallel_for: case OMPD_target_parallel_for:
case OMPD_target_parallel_for_simd: case OMPD_target_parallel_for_simd:
case OMPD_target_teams_distribute:
case OMPD_target_teams_distribute_simd: case OMPD_target_teams_distribute_simd:
case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for:
case OMPD_target_teams_distribute_parallel_for_simd: case OMPD_target_teams_distribute_parallel_for_simd:
@ -794,15 +778,239 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx,
switch (DirectiveKind) { switch (DirectiveKind) {
case OMPD_target: case OMPD_target:
case OMPD_target_teams: case OMPD_target_teams:
case OMPD_target_teams_distribute:
return hasNestedSPMDDirective(Ctx, D); return hasNestedSPMDDirective(Ctx, D);
case OMPD_target_parallel: case OMPD_target_parallel:
case OMPD_target_parallel_for: case OMPD_target_parallel_for:
case OMPD_target_parallel_for_simd: case OMPD_target_parallel_for_simd:
return !hasParallelIfNumThreadsClause(Ctx, D);
case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for:
case OMPD_target_teams_distribute_parallel_for_simd: case OMPD_target_teams_distribute_parallel_for_simd:
return !hasParallelIfNumThreadsClause(Ctx, D); // Distribute with lastprivates requires non-SPMD execution mode.
return !hasParallelIfNumThreadsClause(Ctx, D) &&
!hasDistributeWithLastprivateClauses(D);
case OMPD_target_simd: case OMPD_target_simd:
case OMPD_target_teams_distribute:
case OMPD_target_teams_distribute_simd:
return false;
case OMPD_parallel:
case OMPD_for:
case OMPD_parallel_for:
case OMPD_parallel_sections:
case OMPD_for_simd:
case OMPD_parallel_for_simd:
case OMPD_cancel:
case OMPD_cancellation_point:
case OMPD_ordered:
case OMPD_threadprivate:
case OMPD_task:
case OMPD_simd:
case OMPD_sections:
case OMPD_section:
case OMPD_single:
case OMPD_master:
case OMPD_critical:
case OMPD_taskyield:
case OMPD_barrier:
case OMPD_taskwait:
case OMPD_taskgroup:
case OMPD_atomic:
case OMPD_flush:
case OMPD_teams:
case OMPD_target_data:
case OMPD_target_exit_data:
case OMPD_target_enter_data:
case OMPD_distribute:
case OMPD_distribute_simd:
case OMPD_distribute_parallel_for:
case OMPD_distribute_parallel_for_simd:
case OMPD_teams_distribute:
case OMPD_teams_distribute_simd:
case OMPD_teams_distribute_parallel_for:
case OMPD_teams_distribute_parallel_for_simd:
case OMPD_target_update:
case OMPD_declare_simd:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
case OMPD_taskloop:
case OMPD_taskloop_simd:
case OMPD_unknown:
break;
}
llvm_unreachable(
"Unknown programming model for OpenMP directive on NVPTX target.");
}
/// Check if the directive is loops based and has schedule clause at all or has
/// static scheduling.
static bool hasStaticScheduling(const OMPExecutableDirective &D) {
assert(isOpenMPWorksharingDirective(D.getDirectiveKind()) &&
isOpenMPLoopDirective(D.getDirectiveKind()) &&
"Expected loop-based directive.");
return !D.hasClausesOfKind<OMPOrderedClause>() &&
(!D.hasClausesOfKind<OMPScheduleClause>() ||
llvm::any_of(D.getClausesOfKind<OMPScheduleClause>(),
[](const OMPScheduleClause *C) {
return C->getScheduleKind() == OMPC_SCHEDULE_static;
}));
}
/// Check for inner (nested) lightweight runtime construct, if any
static bool hasNestedLightweightDirective(ASTContext &Ctx,
const OMPExecutableDirective &D) {
assert(supportsSPMDExecutionMode(Ctx, D) && "Expected SPMD mode directive.");
const auto *CS = D.getInnermostCapturedStmt();
const auto *Body =
CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
const Stmt *ChildStmt = getSingleCompoundChild(Body);
if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
switch (D.getDirectiveKind()) {
case OMPD_target:
if (isOpenMPParallelDirective(DKind) &&
isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) &&
hasStaticScheduling(*NestedDir))
return true;
if (DKind == OMPD_parallel) {
Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
/*IgnoreCaptured=*/true);
if (!Body)
return false;
ChildStmt = getSingleCompoundChild(Body);
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
if (isOpenMPWorksharingDirective(DKind) &&
isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
return true;
}
} else if (DKind == OMPD_teams) {
Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
/*IgnoreCaptured=*/true);
if (!Body)
return false;
ChildStmt = getSingleCompoundChild(Body);
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
if (isOpenMPParallelDirective(DKind) &&
isOpenMPWorksharingDirective(DKind) &&
isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
return true;
if (DKind == OMPD_parallel) {
Body = NND->getInnermostCapturedStmt()->IgnoreContainers(
/*IgnoreCaptured=*/true);
if (!Body)
return false;
ChildStmt = getSingleCompoundChild(Body);
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
if (isOpenMPWorksharingDirective(DKind) &&
isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
return true;
}
}
}
}
return false;
case OMPD_target_teams:
if (isOpenMPParallelDirective(DKind) &&
isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) &&
hasStaticScheduling(*NestedDir))
return true;
if (DKind == OMPD_parallel) {
Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
/*IgnoreCaptured=*/true);
if (!Body)
return false;
ChildStmt = getSingleCompoundChild(Body);
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
if (isOpenMPWorksharingDirective(DKind) &&
isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
return true;
}
}
return false;
case OMPD_target_parallel:
return isOpenMPWorksharingDirective(DKind) &&
isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir);
case OMPD_target_teams_distribute:
case OMPD_target_simd:
case OMPD_target_parallel_for:
case OMPD_target_parallel_for_simd:
case OMPD_target_teams_distribute_simd:
case OMPD_target_teams_distribute_parallel_for:
case OMPD_target_teams_distribute_parallel_for_simd:
case OMPD_parallel:
case OMPD_for:
case OMPD_parallel_for:
case OMPD_parallel_sections:
case OMPD_for_simd:
case OMPD_parallel_for_simd:
case OMPD_cancel:
case OMPD_cancellation_point:
case OMPD_ordered:
case OMPD_threadprivate:
case OMPD_task:
case OMPD_simd:
case OMPD_sections:
case OMPD_section:
case OMPD_single:
case OMPD_master:
case OMPD_critical:
case OMPD_taskyield:
case OMPD_barrier:
case OMPD_taskwait:
case OMPD_taskgroup:
case OMPD_atomic:
case OMPD_flush:
case OMPD_teams:
case OMPD_target_data:
case OMPD_target_exit_data:
case OMPD_target_enter_data:
case OMPD_distribute:
case OMPD_distribute_simd:
case OMPD_distribute_parallel_for:
case OMPD_distribute_parallel_for_simd:
case OMPD_teams_distribute:
case OMPD_teams_distribute_simd:
case OMPD_teams_distribute_parallel_for:
case OMPD_teams_distribute_parallel_for_simd:
case OMPD_target_update:
case OMPD_declare_simd:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
case OMPD_taskloop:
case OMPD_taskloop_simd:
case OMPD_unknown:
llvm_unreachable("Unexpected directive.");
}
}
return false;
}
/// Checks if the construct supports lightweight runtime. It must be SPMD
/// construct + inner loop-based construct with static scheduling.
static bool supportsLightweightRuntime(ASTContext &Ctx,
const OMPExecutableDirective &D) {
if (!supportsSPMDExecutionMode(Ctx, D))
return false;
OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
switch (DirectiveKind) {
case OMPD_target:
case OMPD_target_teams:
case OMPD_target_parallel:
return hasNestedLightweightDirective(Ctx, D);
case OMPD_target_parallel_for:
case OMPD_target_parallel_for_simd:
case OMPD_target_teams_distribute_parallel_for:
case OMPD_target_teams_distribute_parallel_for_simd:
// (Last|First)-privates must be shared in parallel region.
return hasStaticScheduling(D);
case OMPD_target_simd:
case OMPD_target_teams_distribute:
case OMPD_target_teams_distribute_simd: case OMPD_target_teams_distribute_simd:
return false; return false;
case OMPD_parallel: case OMPD_parallel:
@ -1010,18 +1218,20 @@ void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader(
EST.ExitBB = CGF.createBasicBlock(".exit"); EST.ExitBB = CGF.createBasicBlock(".exit");
// Initialize the OMP state in the runtime; called by all active threads. // Initialize the OMP state in the runtime; called by all active threads.
// TODO: Set RequiresOMPRuntime and RequiresDataSharing parameters bool RequiresFullRuntime = !supportsLightweightRuntime(CGF.getContext(), D);
// based on code analysis of the target region. llvm::Value *Args[] = {
llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true), getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true),
/*RequiresOMPRuntime=*/Bld.getInt16(1), /*RequiresOMPRuntime=*/
/*RequiresDataSharing=*/Bld.getInt16(1)}; Bld.getInt16(RequiresFullRuntime ? 1 : 0),
/*RequiresDataSharing=*/Bld.getInt16(RequiresFullRuntime ? 1 : 0)};
CGF.EmitRuntimeCall( CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args); createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
if (RequiresFullRuntime) {
// For data sharing, we need to initialize the stack. // For data sharing, we need to initialize the stack.
CGF.EmitRuntimeCall( CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
createNVPTXRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd)); OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd));
}
CGF.EmitBranch(ExecuteBB); CGF.EmitBranch(ExecuteBB);
@ -1414,7 +1624,8 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
/// Build void __kmpc_data_sharing_init_stack_spmd(); /// Build void __kmpc_data_sharing_init_stack_spmd();
auto *FnTy = auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd"); RTLFn =
CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd");
break; break;
} }
case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: { case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: {
@ -1607,6 +1818,7 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
.emitGenericVarsEpilog(CGF); .emitGenericVarsEpilog(CGF);
} }
} Action(Loc); } Action(Loc);
if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD)
CodeGen.setAction(Action); CodeGen.setAction(Action);
llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction( llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen); D, ThreadIDVar, InnermostKind, CodeGen);
@ -1640,19 +1852,61 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
unsigned GlobalRecordSize = unsigned GlobalRecordSize =
CGM.getContext().getTypeSizeInChars(RecTy).getQuantity(); CGM.getContext().getTypeSizeInChars(RecTy).getQuantity();
GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment); GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
llvm::Value *GlobalRecCastAddr;
if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {
llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
llvm::BasicBlock *SPMDBB = CGF.createBasicBlock(".spmd");
llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");
llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB);
// There is no need to emit line number for unconditional branch.
(void)ApplyDebugLocation::CreateEmpty(CGF);
CGF.EmitBlock(SPMDBB);
Address RecPtr = CGF.CreateMemTemp(RecTy, "_local_stack");
CGF.EmitBranch(ExitBB);
// There is no need to emit line number for unconditional branch.
(void)ApplyDebugLocation::CreateEmpty(CGF);
CGF.EmitBlock(NonSPMDBB);
// TODO: allow the usage of shared memory to be controlled by // TODO: allow the usage of shared memory to be controlled by
// the user, for now, default to global. // the user, for now, default to global.
llvm::Value *GlobalRecordSizeArg[] = { llvm::Value *GlobalRecordSizeArg[] = {
llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( llvm::Value *GlobalRecValue =
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack), CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
GlobalRecordSizeArg); GlobalRecordSizeArg);
llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo()); GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo());
CGF.EmitBlock(ExitBB);
auto *Phi = Bld.CreatePHI(GlobalRecCastAddr->getType(),
/*NumReservedValues=*/2, "_select_stack");
Phi->addIncoming(RecPtr.getPointer(), SPMDBB);
Phi->addIncoming(GlobalRecCastAddr, NonSPMDBB);
GlobalRecCastAddr = Phi;
I->getSecond().GlobalRecordAddr = Phi;
I->getSecond().IsInSPMDModeFlag = IsSPMD;
} else {
assert(getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_NonSPMD &&
"Expected Non-SPMD construct.");
// TODO: allow the usage of shared memory to be controlled by
// the user, for now, default to global.
llvm::Value *GlobalRecordSizeArg[] = {
llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
llvm::Value *GlobalRecValue =
CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
GlobalRecordSizeArg);
GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo());
I->getSecond().GlobalRecordAddr = GlobalRecValue;
I->getSecond().IsInSPMDModeFlag = nullptr;
}
LValue Base = LValue Base =
CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, RecTy); CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, RecTy);
I->getSecond().GlobalRecordAddr = GlobalRecValue;
// Emit the "global alloca" which is a GEP from the global declaration // Emit the "global alloca" which is a GEP from the global declaration
// record using the pointer returned by the runtime. // record using the pointer returned by the runtime.
@ -1724,11 +1978,28 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF) {
Addr); Addr);
} }
if (I->getSecond().GlobalRecordAddr) { if (I->getSecond().GlobalRecordAddr) {
if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {
CGBuilderTy &Bld = CGF.Builder;
llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");
Bld.CreateCondBr(I->getSecond().IsInSPMDModeFlag, ExitBB, NonSPMDBB);
// There is no need to emit line number for unconditional branch.
(void)ApplyDebugLocation::CreateEmpty(CGF);
CGF.EmitBlock(NonSPMDBB);
CGF.EmitRuntimeCall( CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), createNVPTXRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr));
CGF.EmitBlock(ExitBB);
} else {
assert(getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_NonSPMD &&
"Expected Non-SPMD mode.");
CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
I->getSecond().GlobalRecordAddr); I->getSecond().GlobalRecordAddr);
} }
} }
}
} }
void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF, void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF,

View File

@ -374,6 +374,7 @@ private:
llvm::SmallVector<llvm::Value *, 4> EscapedVariableLengthDeclsAddrs; llvm::SmallVector<llvm::Value *, 4> EscapedVariableLengthDeclsAddrs;
const RecordDecl *GlobalRecord = nullptr; const RecordDecl *GlobalRecord = nullptr;
llvm::Value *GlobalRecordAddr = nullptr; llvm::Value *GlobalRecordAddr = nullptr;
llvm::Value *IsInSPMDModeFlag = nullptr;
std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams; std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;
}; };
/// Maps the function to the list of the globalized variables with their /// Maps the function to the list of the globalized variables with their

View File

@ -35,9 +35,19 @@ int maini1() {
// CHECK-NOT: @__kmpc_data_sharing_push_stack // CHECK-NOT: @__kmpc_data_sharing_push_stack
// CHECK: define {{.*}}[[BAR]]() // CHECK: define {{.*}}[[BAR]]()
// CHECK: [[STACK:%.+]] = alloca [[GLOBAL_ST:%.+]],
// CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode()
// CHECK: [[IS_SPMD:%.+]] = icmp ne i8 [[RES]], 0
// CHECK: br i1 [[IS_SPMD]], label
// CHECK: br label
// CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0) // CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0)
// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_ST:%struct[.].*]]* // CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_ST]]*
// CHECK: [[A_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[GLOBALS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: br label
// CHECK: [[ITEMS:%.+]] = phi [[GLOBAL_ST]]* [ [[STACK]], {{.+}} ], [ [[GLOBALS]], {{.+}} ]
// CHECK: [[A_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[ITEMS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
// CHECK: call {{.*}}[[FOO]](i32* dereferenceable{{.*}} [[A_ADDR]]) // CHECK: call {{.*}}[[FOO]](i32* dereferenceable{{.*}} [[A_ADDR]])
// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]]) // CHECK: br i1 [[IS_SPMD]], label
// CHECK: [[BC:%.+]] = bitcast [[GLOBAL_ST]]* [[ITEMS]] to i8*
// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[BC]])
// CHECK: br label
// CHECK: ret i32 // CHECK: ret i32

View File

@ -0,0 +1,328 @@
// Test target codegen - host bc file has to be created first.
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1
// CHECK: @__omp_offloading_{{.+}}_l52_exec_mode = weak constant i8 1
// CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1
void foo() {
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
#pragma omp target teams distribute parallel for simd
for (int i = 0; i < 10; ++i)
;
#pragma omp target teams distribute parallel for simd schedule(static)
for (int i = 0; i < 10; ++i)
;
#pragma omp target teams distribute parallel for simd schedule(static, 1)
for (int i = 0; i < 10; ++i)
;
#pragma omp target teams distribute parallel for simd schedule(auto)
for (int i = 0; i < 10; ++i)
;
#pragma omp target teams distribute parallel for simd schedule(runtime)
for (int i = 0; i < 10; ++i)
;
#pragma omp target teams distribute parallel for simd schedule(dynamic)
for (int i = 0; i < 10; ++i)
;
#pragma omp target teams distribute parallel for simd schedule(guided)
for (int i = 0; i < 10; ++i)
;
int a;
// CHECK: call void @__kmpc_kernel_init(
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
#pragma omp target teams distribute parallel for lastprivate(a)
for (int i = 0; i < 10; ++i)
a = i;
#pragma omp target teams distribute parallel for schedule(static)
for (int i = 0; i < 10; ++i)
;
#pragma omp target teams distribute parallel for schedule(static, 1)
for (int i = 0; i < 10; ++i)
;
#pragma omp target teams distribute parallel for schedule(auto)
for (int i = 0; i < 10; ++i)
;
#pragma omp target teams distribute parallel for schedule(runtime)
for (int i = 0; i < 10; ++i)
;
#pragma omp target teams distribute parallel for schedule(dynamic)
for (int i = 0; i < 10; ++i)
;
#pragma omp target teams distribute parallel for schedule(guided)
for (int i = 0; i < 10; ++i)
;
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
#pragma omp target teams
#pragma omp distribute parallel for simd
for (int i = 0; i < 10; ++i)
;
#pragma omp target teams
#pragma omp distribute parallel for simd schedule(static)
for (int i = 0; i < 10; ++i)
;
#pragma omp target teams
#pragma omp distribute parallel for simd schedule(static, 1)
for (int i = 0; i < 10; ++i)
;
#pragma omp target teams
#pragma omp distribute parallel for simd schedule(auto)
for (int i = 0; i < 10; ++i)
;
#pragma omp target teams
#pragma omp distribute parallel for simd schedule(runtime)
for (int i = 0; i < 10; ++i)
;
#pragma omp target teams
#pragma omp distribute parallel for simd schedule(dynamic)
for (int i = 0; i < 10; ++i)
;
#pragma omp target teams
#pragma omp distribute parallel for simd schedule(guided)
for (int i = 0; i < 10; ++i)
;
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
#pragma omp target teams
#pragma omp distribute parallel for
for (int i = 0; i < 10; ++i)
;
#pragma omp target teams
#pragma omp distribute parallel for schedule(static)
for (int i = 0; i < 10; ++i)
;
#pragma omp target teams
#pragma omp distribute parallel for schedule(static, 1)
for (int i = 0; i < 10; ++i)
;
#pragma omp target teams
#pragma omp distribute parallel for schedule(auto)
for (int i = 0; i < 10; ++i)
;
#pragma omp target teams
#pragma omp distribute parallel for schedule(runtime)
for (int i = 0; i < 10; ++i)
;
#pragma omp target teams
#pragma omp distribute parallel for schedule(dynamic)
for (int i = 0; i < 10; ++i)
;
#pragma omp target teams
#pragma omp distribute parallel for schedule(guided)
for (int i = 0; i < 10; ++i)
;
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
#pragma omp target
#pragma omp teams
#pragma omp distribute parallel for
for (int i = 0; i < 10; ++i)
;
#pragma omp target
#pragma omp teams
#pragma omp distribute parallel for schedule(static)
for (int i = 0; i < 10; ++i)
;
#pragma omp target
#pragma omp teams
#pragma omp distribute parallel for schedule(static, 1)
for (int i = 0; i < 10; ++i)
;
#pragma omp target
#pragma omp teams
#pragma omp distribute parallel for schedule(auto)
for (int i = 0; i < 10; ++i)
;
#pragma omp target
#pragma omp teams
#pragma omp distribute parallel for schedule(runtime)
for (int i = 0; i < 10; ++i)
;
#pragma omp target
#pragma omp teams
#pragma omp distribute parallel for schedule(dynamic)
for (int i = 0; i < 10; ++i)
;
#pragma omp target
#pragma omp teams
#pragma omp distribute parallel for schedule(guided)
for (int i = 0; i < 10; ++i)
;
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
#pragma omp target parallel for
for (int i = 0; i < 10; ++i)
;
#pragma omp target parallel for schedule(static)
for (int i = 0; i < 10; ++i)
;
#pragma omp target parallel for schedule(static, 1)
for (int i = 0; i < 10; ++i)
;
#pragma omp target parallel for schedule(auto)
for (int i = 0; i < 10; ++i)
;
#pragma omp target parallel for schedule(runtime)
for (int i = 0; i < 10; ++i)
;
#pragma omp target parallel for schedule(dynamic)
for (int i = 0; i < 10; ++i)
;
#pragma omp target parallel for schedule(guided)
for (int i = 0; i < 10; ++i)
;
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
#pragma omp target parallel
#pragma omp for simd
for (int i = 0; i < 10; ++i)
;
#pragma omp target parallel
#pragma omp for simd schedule(static)
for (int i = 0; i < 10; ++i)
;
#pragma omp target parallel
#pragma omp for simd schedule(static, 1)
for (int i = 0; i < 10; ++i)
;
#pragma omp target parallel
#pragma omp for simd schedule(auto)
for (int i = 0; i < 10; ++i)
;
#pragma omp target parallel
#pragma omp for simd schedule(runtime)
for (int i = 0; i < 10; ++i)
;
#pragma omp target parallel
#pragma omp for simd schedule(dynamic)
for (int i = 0; i < 10; ++i)
;
#pragma omp target parallel
#pragma omp for simd schedule(guided)
for (int i = 0; i < 10; ++i)
;
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
#pragma omp target
#pragma omp parallel
#pragma omp for simd ordered
for (int i = 0; i < 10; ++i)
;
#pragma omp target
#pragma omp parallel
#pragma omp for simd schedule(static)
for (int i = 0; i < 10; ++i)
;
#pragma omp target
#pragma omp parallel
#pragma omp for simd schedule(static, 1)
for (int i = 0; i < 10; ++i)
;
#pragma omp target
#pragma omp parallel
#pragma omp for simd schedule(auto)
for (int i = 0; i < 10; ++i)
;
#pragma omp target
#pragma omp parallel
#pragma omp for simd schedule(runtime)
for (int i = 0; i < 10; ++i)
;
#pragma omp target
#pragma omp parallel
#pragma omp for simd schedule(dynamic)
for (int i = 0; i < 10; ++i)
;
#pragma omp target
#pragma omp parallel
#pragma omp for simd schedule(guided)
for (int i = 0; i < 10; ++i)
;
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
#pragma omp target
#pragma omp parallel for
for (int i = 0; i < 10; ++i)
;
#pragma omp target
#pragma omp parallel for schedule(static)
for (int i = 0; i < 10; ++i)
;
#pragma omp target
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < 10; ++i)
;
#pragma omp target
#pragma omp parallel for schedule(auto)
for (int i = 0; i < 10; ++i)
;
#pragma omp target
#pragma omp parallel for schedule(runtime)
for (int i = 0; i < 10; ++i)
;
#pragma omp target
#pragma omp parallel for schedule(dynamic)
for (int i = 0; i < 10; ++i)
;
#pragma omp target
#pragma omp parallel for schedule(guided)
for (int i = 0; i < 10; ++i)
;
}
#endif

View File

@ -554,13 +554,20 @@ int baz(int f, double &a) {
// CHECK: ret void // CHECK: ret void
// CHECK: define i32 [[BAZ]](i32 [[F:%.*]], double* dereferenceable{{.*}}) // CHECK: define i32 [[BAZ]](i32 [[F:%.*]], double* dereferenceable{{.*}})
// CHECK: [[STACK:%.+]] = alloca [[GLOBAL_ST:%.+]],
// CHECK: [[ZERO_ADDR:%.+]] = alloca i32, // CHECK: [[ZERO_ADDR:%.+]] = alloca i32,
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t*
// CHECK: [[GTID_ADDR:%.+]] = alloca i32, // CHECK: [[GTID_ADDR:%.+]] = alloca i32,
// CHECK: store i32 0, i32* [[ZERO_ADDR]] // CHECK: store i32 0, i32* [[ZERO_ADDR]]
// CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode()
// CHECK: [[IS_SPMD:%.+]] = icmp ne i8 [[RES]], 0
// CHECK: br i1 [[IS_SPMD]], label
// CHECK: br label
// CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 4, i16 0) // CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 4, i16 0)
// CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to %struct._globalized_locals_ty* // CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to [[GLOBAL_ST]]*
// CHECK: [[F_PTR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[REC_ADDR]], i32 0, i32 0 // CHECK: br label
// CHECK: [[ITEMS:%.+]] = phi [[GLOBAL_ST]]* [ [[STACK]], {{.+}} ], [ [[REC_ADDR]], {{.+}} ]
// CHECK: [[F_PTR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[ITEMS]], i32 0, i32 0
// CHECK: store i32 %{{.+}}, i32* [[F_PTR]], // CHECK: store i32 %{{.+}}, i32* [[F_PTR]],
// CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode() // CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode()
@ -595,7 +602,12 @@ int baz(int f, double &a) {
// CHECK: br label // CHECK: br label
// CHECK: [[RES:%.+]] = load i32, i32* [[F_PTR]], // CHECK: [[RES:%.+]] = load i32, i32* [[F_PTR]],
// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[PTR]]) // CHECK: store i32 [[RES]], i32* [[RET:%.+]],
// CHECK: br i1 [[IS_SPMD]], label
// CHECK: [[BC:%.+]] = bitcast [[GLOBAL_ST]]* [[ITEMS]] to i8*
// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[BC]])
// CHECK: br label
// CHECK: [[RES:%.+]] = load i32, i32* [[RET]],
// CHECK: ret i32 [[RES]] // CHECK: ret i32 [[RES]]

View File

@ -59,7 +59,7 @@ int bar(int n){
// CHECK: store i16* {{%.+}}, i16** [[AA_ADDR]], align // CHECK: store i16* {{%.+}}, i16** [[AA_ADDR]], align
// CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align // CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align
// CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1, i16 1)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: br label {{%?}}[[EXEC:.+]] // CHECK: br label {{%?}}[[EXEC:.+]]
// //
@ -102,7 +102,7 @@ int bar(int n){
// CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align // CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align
// CHECK: [[B:%.+]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align // CHECK: [[B:%.+]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align
// CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1, i16 1)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: br label {{%?}}[[EXEC:.+]] // CHECK: br label {{%?}}[[EXEC:.+]]
// //

View File

@ -47,7 +47,7 @@ int bar(int n){
} }
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l22}}( // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l22}}(
// CHECK: call void @__kmpc_spmd_kernel_init( // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: br label {{%?}}[[EXEC:.+]] // CHECK: br label {{%?}}[[EXEC:.+]]
// //
@ -69,7 +69,7 @@ int bar(int n){
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l26}}( // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l26}}(
// CHECK: call void @__kmpc_spmd_kernel_init( // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: br label {{%?}}[[EXEC:.+]] // CHECK: br label {{%?}}[[EXEC:.+]]
// //
@ -90,7 +90,7 @@ int bar(int n){
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l31}}( // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l31}}(
// CHECK: call void @__kmpc_spmd_kernel_init( // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: br label {{%?}}[[EXEC:.+]] // CHECK: br label {{%?}}[[EXEC:.+]]
// //

View File

@ -54,7 +54,7 @@ int bar(int n){
// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l27}}( // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l27}}(
// //
// CHECK: call void @__kmpc_spmd_kernel_init( // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: br label {{%?}}[[EXECUTE:.+]] // CHECK: br label {{%?}}[[EXECUTE:.+]]
// //
@ -242,7 +242,7 @@ int bar(int n){
// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l32}}( // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l32}}(
// //
// CHECK: call void @__kmpc_spmd_kernel_init( // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: br label {{%?}}[[EXECUTE:.+]] // CHECK: br label {{%?}}[[EXECUTE:.+]]
// //
@ -520,7 +520,7 @@ int bar(int n){
// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l38}}( // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l38}}(
// //
// CHECK: call void @__kmpc_spmd_kernel_init( // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: br label {{%?}}[[EXECUTE:.+]] // CHECK: br label {{%?}}[[EXECUTE:.+]]
// //

View File

@ -227,13 +227,13 @@ int bar(int n){
// CHECK: ret void // CHECK: ret void
// CHECK: define weak void @__omp_offloading_{{.*}}ftemplate{{.*}}_l37( // CHECK: define weak void @__omp_offloading_{{.*}}ftemplate{{.*}}_l37(
// CHECK: call void @__kmpc_spmd_kernel_init( // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: call i8* @__kmpc_data_sharing_push_stack( // CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack(
// CHECK-NOT: call void @__kmpc_serialized_parallel( // CHECK-NOT: call void @__kmpc_serialized_parallel(
// CHECK: call void [[L0:@.+]](i32* %{{.+}}, i32* %{{.+}}, i16* %{{.*}}) // CHECK: call void [[L0:@.+]](i32* %{{.+}}, i32* %{{.+}}, i16* %{{.*}})
// CHECK-NOT: call void @__kmpc_end_serialized_parallel( // CHECK-NOT: call void @__kmpc_end_serialized_parallel(
// CHECK: call void @__kmpc_data_sharing_pop_stack( // CHECK-NOT: call void @__kmpc_data_sharing_pop_stack(
// CHECK: call void @__kmpc_spmd_kernel_deinit() // CHECK: call void @__kmpc_spmd_kernel_deinit()
// CHECK: ret // CHECK: ret

View File

@ -8,12 +8,13 @@
#ifndef HEADER #ifndef HEADER
#define HEADER #define HEADER
// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode. // Check that the execution mode of the target region with lastprivates on the gpu is set to Non-SPMD Mode.
// CHECK-DAG: {{@__omp_offloading_.+l32}}_exec_mode = weak constant i8 0 // CHECK-DAG: {{@__omp_offloading_.+l33}}_exec_mode = weak constant i8 1
// CHECK-DAG: {{@__omp_offloading_.+l38}}_exec_mode = weak constant i8 0 // Check that the execution mode of all 4 target regions on the gpu is set to SPMD Mode.
// CHECK-DAG: {{@__omp_offloading_.+l43}}_exec_mode = weak constant i8 0 // CHECK-DAG: {{@__omp_offloading_.+l39}}_exec_mode = weak constant i8 0
// CHECK-DAG: {{@__omp_offloading_.+l48}}_exec_mode = weak constant i8 0 // CHECK-DAG: {{@__omp_offloading_.+l44}}_exec_mode = weak constant i8 0
// CHECK-DAG: {{@__omp_offloading_.+l56}}_exec_mode = weak constant i8 0 // CHECK-DAG: {{@__omp_offloading_.+l49}}_exec_mode = weak constant i8 0
// CHECK-DAG: {{@__omp_offloading_.+l57}}_exec_mode = weak constant i8 0
#define N 1000 #define N 1000
#define M 10 #define M 10
@ -67,14 +68,14 @@ int bar(int n){
return a; return a;
} }
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( // CHECK_LABEL: define internal void @__omp_offloading_{{.+}}_l33_worker()
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l33(
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1)
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: {{call|invoke}} void [[OUTL1:@.+]]( // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[OUTL1:@__omp_outlined.*]]_wrapper to i8*), i16 1)
// CHECK: call void @__kmpc_for_static_fini( // CHECK: call void @__kmpc_for_static_fini(
// CHECK: call void @__kmpc_spmd_kernel_deinit() // CHECK: call void @__kmpc_kernel_deinit(i16 1)
// CHECK: ret void // CHECK: ret void
// CHECK: define internal void [[OUTL1]]( // CHECK: define internal void [[OUTL1]](
@ -84,8 +85,7 @@ int bar(int n){
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
// CHECK: {{call|invoke}} void [[OUTL2:@.+]]( // CHECK: {{call|invoke}} void [[OUTL2:@.+]](
// CHECK: call void @__kmpc_for_static_fini( // CHECK: call void @__kmpc_for_static_fini(
@ -99,8 +99,7 @@ int bar(int n){
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
// CHECK: {{call|invoke}} void [[OUTL3:@.+]]( // CHECK: {{call|invoke}} void [[OUTL3:@.+]](
// CHECK: call void @__kmpc_for_static_fini( // CHECK: call void @__kmpc_for_static_fini(
@ -115,8 +114,7 @@ int bar(int n){
// CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]]) // CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]])
// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}}, // CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align // CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]], // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
// CHECK: {{call|invoke}} void [[OUTL4:@.+]]( // CHECK: {{call|invoke}} void [[OUTL4:@.+]](
@ -129,7 +127,7 @@ int bar(int n){
// CHECK: call void @__kmpc_for_static_fini( // CHECK: call void @__kmpc_for_static_fini(
// CHECK: ret void // CHECK: ret void
// CHECK: define weak void @__omp_offloading_{{.*}}_l56(i[[SZ:64|32]] %{{[^,]+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{[^)]+}}) // CHECK: define weak void @__omp_offloading_{{.*}}_l57(i[[SZ:64|32]] %{{[^,]+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{[^)]+}})
// CHECK: call void [[OUTLINED:@__omp_outlined.*]](i32* %{{.+}}, i32* %{{.+}}, i[[SZ]] %{{.*}}, i[[SZ]] %{{.*}}, i[[SZ]] %{{.*}}, [1000 x i32]* %{{.*}}, i32* %{{.*}}) // CHECK: call void [[OUTLINED:@__omp_outlined.*]](i32* %{{.+}}, i32* %{{.+}}, i[[SZ]] %{{.*}}, i[[SZ]] %{{.*}}, i[[SZ]] %{{.*}}, [1000 x i32]* %{{.*}}, i32* %{{.*}})
// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}} i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{.*}}) // CHECK: define internal void [[OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}} i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{.*}})

View File

@ -8,11 +8,12 @@
#ifndef HEADER #ifndef HEADER
#define HEADER #define HEADER
// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode. // Check that the execution mode of the target region with lastprivates on the gpu is set to Non-SPMD Mode.
// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0 // CHECK-DAG: {{@__omp_offloading_.+l31}}_exec_mode = weak constant i8 1
// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 0 // Check that the execution mode of all 3 target regions on the gpu is set to SPMD Mode.
// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 0 // CHECK-DAG: {{@__omp_offloading_.+l37}}_exec_mode = weak constant i8 0
// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 0 // CHECK-DAG: {{@__omp_offloading_.+l42}}_exec_mode = weak constant i8 0
// CHECK-DAG: {{@__omp_offloading_.+l47}}_exec_mode = weak constant i8 0
#define N 1000 #define N 1000
#define M 10 #define M 10
@ -62,14 +63,14 @@ int bar(int n){
return a; return a;
} }
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( // CHECK_LABEL: define internal void @__omp_offloading_{{.+}}_l31_worker()
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l31(
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1)
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: {{call|invoke}} void [[OUTL1:@.+]]( // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[OUTL1:@__omp_outlined.*]]_wrapper to i8*), i16 1)
// CHECK: call void @__kmpc_for_static_fini( // CHECK: call void @__kmpc_for_static_fini(
// CHECK: call void @__kmpc_spmd_kernel_deinit() // CHECK: call void @__kmpc_kernel_deinit(i16 1)
// CHECK: ret void // CHECK: ret void
// CHECK: define internal void [[OUTL1]]( // CHECK: define internal void [[OUTL1]](
@ -79,8 +80,7 @@ int bar(int n){
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
// CHECK: {{call|invoke}} void [[OUTL2:@.+]]( // CHECK: {{call|invoke}} void [[OUTL2:@.+]](
// CHECK: call void @__kmpc_for_static_fini( // CHECK: call void @__kmpc_for_static_fini(
@ -94,8 +94,7 @@ int bar(int n){
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
// CHECK: {{call|invoke}} void [[OUTL3:@.+]]( // CHECK: {{call|invoke}} void [[OUTL3:@.+]](
// CHECK: call void @__kmpc_for_static_fini( // CHECK: call void @__kmpc_for_static_fini(
@ -110,8 +109,7 @@ int bar(int n){
// CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]]) // CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]])
// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}}, // CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align // CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]], // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
// CHECK: {{call|invoke}} void [[OUTL4:@.+]]( // CHECK: {{call|invoke}} void [[OUTL4:@.+]](