forked from OSchip/llvm-project
[OPENMP][NVPTX]Extend number of constructs executed in SPMD mode.
If the statements between target|teams|distribute directives does not require execution in master thread, like constant expressions, null statements, simple declarations, etc., such construct can be xecuted in SPMD mode. llvm-svn: 346551
This commit is contained in:
parent
c025f09ee4
commit
8bcc69c054
|
@ -698,12 +698,58 @@ getDataSharingMode(CodeGenModule &CGM) {
|
|||
: CGOpenMPRuntimeNVPTX::Generic;
|
||||
}
|
||||
|
||||
// Checks if the expression is constant or does not have non-trivial function
|
||||
// calls.
|
||||
static bool isTrivial(ASTContext &Ctx, const Expr * E) {
|
||||
// We can skip constant expressions.
|
||||
// We can skip expressions with trivial calls or simple expressions.
|
||||
return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
|
||||
!E->hasNonTrivialCall(Ctx)) &&
|
||||
!E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
|
||||
}
|
||||
|
||||
/// Checks if the \p Body is the \a CompoundStmt and returns its child statement
|
||||
/// iff there is only one.
|
||||
static const Stmt *getSingleCompoundChild(const Stmt *Body) {
|
||||
if (const auto *C = dyn_cast<CompoundStmt>(Body))
|
||||
if (C->size() == 1)
|
||||
return C->body_front();
|
||||
/// iff there is only one that is not evaluatable at the compile time.
|
||||
static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) {
|
||||
if (const auto *C = dyn_cast<CompoundStmt>(Body)) {
|
||||
const Stmt *Child = nullptr;
|
||||
for (const Stmt *S : C->body()) {
|
||||
if (const auto *E = dyn_cast<Expr>(S)) {
|
||||
if (isTrivial(Ctx, E))
|
||||
continue;
|
||||
}
|
||||
// Some of the statements can be ignored.
|
||||
if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
|
||||
isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
|
||||
continue;
|
||||
// Analyze declarations.
|
||||
if (const auto *DS = dyn_cast<DeclStmt>(S)) {
|
||||
if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
|
||||
if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
|
||||
isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
|
||||
isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
|
||||
isa<UsingDirectiveDecl>(D) ||
|
||||
isa<OMPDeclareReductionDecl>(D) ||
|
||||
isa<OMPThreadPrivateDecl>(D))
|
||||
return true;
|
||||
const auto *VD = dyn_cast<VarDecl>(D);
|
||||
if (!VD)
|
||||
return false;
|
||||
return VD->isConstexpr() ||
|
||||
((VD->getType().isTrivialType(Ctx) ||
|
||||
VD->getType()->isReferenceType()) &&
|
||||
(!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
|
||||
}))
|
||||
continue;
|
||||
}
|
||||
// Found multiple children - cannot get the one child only.
|
||||
if (Child)
|
||||
return Body;
|
||||
Child = S;
|
||||
}
|
||||
if (Child)
|
||||
return Child;
|
||||
}
|
||||
return Body;
|
||||
}
|
||||
|
||||
|
@ -732,7 +778,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
|
|||
const auto *CS = D.getInnermostCapturedStmt();
|
||||
const auto *Body =
|
||||
CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
|
||||
const Stmt *ChildStmt = getSingleCompoundChild(Body);
|
||||
const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body);
|
||||
|
||||
if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
||||
OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
|
||||
|
@ -746,7 +792,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
|
|||
/*IgnoreCaptured=*/true);
|
||||
if (!Body)
|
||||
return false;
|
||||
ChildStmt = getSingleCompoundChild(Body);
|
||||
ChildStmt = getSingleCompoundChild(Ctx, Body);
|
||||
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
||||
DKind = NND->getDirectiveKind();
|
||||
if (isOpenMPParallelDirective(DKind) &&
|
||||
|
@ -905,7 +951,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx,
|
|||
const auto *CS = D.getInnermostCapturedStmt();
|
||||
const auto *Body =
|
||||
CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
|
||||
const Stmt *ChildStmt = getSingleCompoundChild(Body);
|
||||
const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body);
|
||||
|
||||
if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
||||
OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
|
||||
|
@ -920,7 +966,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx,
|
|||
/*IgnoreCaptured=*/true);
|
||||
if (!Body)
|
||||
return false;
|
||||
ChildStmt = getSingleCompoundChild(Body);
|
||||
ChildStmt = getSingleCompoundChild(Ctx, Body);
|
||||
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
||||
DKind = NND->getDirectiveKind();
|
||||
if (isOpenMPWorksharingDirective(DKind) &&
|
||||
|
@ -932,7 +978,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx,
|
|||
/*IgnoreCaptured=*/true);
|
||||
if (!Body)
|
||||
return false;
|
||||
ChildStmt = getSingleCompoundChild(Body);
|
||||
ChildStmt = getSingleCompoundChild(Ctx, Body);
|
||||
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
||||
DKind = NND->getDirectiveKind();
|
||||
if (isOpenMPParallelDirective(DKind) &&
|
||||
|
@ -944,7 +990,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx,
|
|||
/*IgnoreCaptured=*/true);
|
||||
if (!Body)
|
||||
return false;
|
||||
ChildStmt = getSingleCompoundChild(Body);
|
||||
ChildStmt = getSingleCompoundChild(Ctx, Body);
|
||||
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
||||
DKind = NND->getDirectiveKind();
|
||||
if (isOpenMPWorksharingDirective(DKind) &&
|
||||
|
@ -965,7 +1011,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx,
|
|||
/*IgnoreCaptured=*/true);
|
||||
if (!Body)
|
||||
return false;
|
||||
ChildStmt = getSingleCompoundChild(Body);
|
||||
ChildStmt = getSingleCompoundChild(Ctx, Body);
|
||||
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
||||
DKind = NND->getDirectiveKind();
|
||||
if (isOpenMPWorksharingDirective(DKind) &&
|
||||
|
@ -1287,10 +1333,6 @@ void CGOpenMPRuntimeNVPTX::emitSPMDKernel(const OMPExecutableDirective &D,
|
|||
IsInTTDRegion = false;
|
||||
}
|
||||
|
||||
static void
|
||||
getDistributeLastprivateVars(const OMPExecutableDirective &D,
|
||||
llvm::SmallVectorImpl<const ValueDecl *> &Vars);
|
||||
|
||||
void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader(
|
||||
CodeGenFunction &CGF, EntryFunctionState &EST,
|
||||
const OMPExecutableDirective &D) {
|
||||
|
@ -1303,33 +1345,10 @@ void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader(
|
|||
// Initialize the OMP state in the runtime; called by all active threads.
|
||||
bool RequiresFullRuntime = CGM.getLangOpts().OpenMPCUDAForceFullRuntime ||
|
||||
!supportsLightweightRuntime(CGF.getContext(), D);
|
||||
// Check if we have inner distribute + lastprivate|reduction clauses.
|
||||
bool RequiresDatasharing = RequiresFullRuntime;
|
||||
if (!RequiresDatasharing) {
|
||||
const OMPExecutableDirective *TD = &D;
|
||||
if (!isOpenMPTeamsDirective(TD->getDirectiveKind()) &&
|
||||
!isOpenMPParallelDirective(TD->getDirectiveKind())) {
|
||||
const Stmt *S = getSingleCompoundChild(
|
||||
TD->getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(
|
||||
/*IgnoreCaptured=*/true));
|
||||
TD = cast<OMPExecutableDirective>(S);
|
||||
}
|
||||
if (!isOpenMPDistributeDirective(TD->getDirectiveKind()) &&
|
||||
!isOpenMPParallelDirective(TD->getDirectiveKind())) {
|
||||
const Stmt *S = getSingleCompoundChild(
|
||||
TD->getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(
|
||||
/*IgnoreCaptured=*/true));
|
||||
TD = cast<OMPExecutableDirective>(S);
|
||||
}
|
||||
if (isOpenMPDistributeDirective(TD->getDirectiveKind()))
|
||||
RequiresDatasharing = TD->hasClausesOfKind<OMPLastprivateClause>() ||
|
||||
TD->hasClausesOfKind<OMPReductionClause>();
|
||||
}
|
||||
llvm::Value *Args[] = {
|
||||
getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true),
|
||||
llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true),
|
||||
/*RequiresOMPRuntime=*/
|
||||
Bld.getInt16(RequiresFullRuntime ? 1 : 0),
|
||||
/*RequiresDataSharing=*/Bld.getInt16(RequiresDatasharing ? 1 : 0)};
|
||||
/*RequiresDataSharing=*/Bld.getInt16(0)};
|
||||
CGF.EmitRuntimeCall(
|
||||
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
|
||||
|
||||
|
@ -1928,13 +1947,14 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
|
|||
/// Get list of lastprivate variables from the teams distribute ... or
|
||||
/// teams {distribute ...} directives.
|
||||
static void
|
||||
getDistributeLastprivateVars(const OMPExecutableDirective &D,
|
||||
getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D,
|
||||
llvm::SmallVectorImpl<const ValueDecl *> &Vars) {
|
||||
assert(isOpenMPTeamsDirective(D.getDirectiveKind()) &&
|
||||
"expected teams directive.");
|
||||
const OMPExecutableDirective *Dir = &D;
|
||||
if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
|
||||
if (const Stmt *S = getSingleCompoundChild(
|
||||
Ctx,
|
||||
D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(
|
||||
/*IgnoreCaptured=*/true))) {
|
||||
Dir = dyn_cast<OMPExecutableDirective>(S);
|
||||
|
@ -1961,7 +1981,7 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
|
|||
llvm::SmallVector<const ValueDecl *, 4> LastPrivates;
|
||||
llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
|
||||
if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) {
|
||||
getDistributeLastprivateVars(D, LastPrivates);
|
||||
getDistributeLastprivateVars(CGM.getContext(), D, LastPrivates);
|
||||
if (!LastPrivates.empty())
|
||||
GlobalizedRD = ::buildRecordForGlobalizedVars(
|
||||
CGM.getContext(), llvm::None, LastPrivates, MappedDeclsFields);
|
||||
|
|
|
@ -40,7 +40,7 @@ void foo() {
|
|||
for (int i = 0; i < 10; ++i)
|
||||
;
|
||||
int a;
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 1)
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||
|
@ -76,17 +76,28 @@ int a;
|
|||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||
#pragma omp target teams
|
||||
{
|
||||
int b;
|
||||
#pragma omp distribute parallel for simd
|
||||
for (int i = 0; i < 10; ++i)
|
||||
;
|
||||
;
|
||||
}
|
||||
#pragma omp target teams
|
||||
{
|
||||
int b[] = {2, 3, sizeof(int)};
|
||||
#pragma omp distribute parallel for simd schedule(static)
|
||||
for (int i = 0; i < 10; ++i)
|
||||
;
|
||||
}
|
||||
#pragma omp target teams
|
||||
{
|
||||
int b;
|
||||
#pragma omp distribute parallel for simd schedule(static, 1)
|
||||
for (int i = 0; i < 10; ++i)
|
||||
;
|
||||
int &c = b;
|
||||
}
|
||||
#pragma omp target teams
|
||||
#pragma omp distribute parallel for simd schedule(auto)
|
||||
for (int i = 0; i < 10; ++i)
|
||||
|
|
|
@ -59,7 +59,7 @@ int bar(int n){
|
|||
// CHECK: store i16* {{%.+}}, i16** [[AA_ADDR]], align
|
||||
// CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align
|
||||
// CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1, i16 1)
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1, i16 0)
|
||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
||||
// CHECK: br label {{%?}}[[EXEC:.+]]
|
||||
//
|
||||
|
@ -102,7 +102,7 @@ int bar(int n){
|
|||
// CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align
|
||||
// CHECK: [[B:%.+]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align
|
||||
// CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1, i16 1)
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1, i16 0)
|
||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
||||
// CHECK: br label {{%?}}[[EXEC:.+]]
|
||||
//
|
||||
|
|
|
@ -47,7 +47,7 @@ int bar(int n){
|
|||
}
|
||||
|
||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l22}}(
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0)
|
||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
||||
// CHECK: br label {{%?}}[[EXEC:.+]]
|
||||
//
|
||||
|
@ -69,7 +69,7 @@ int bar(int n){
|
|||
|
||||
|
||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l26}}(
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0)
|
||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
||||
// CHECK: br label {{%?}}[[EXEC:.+]]
|
||||
//
|
||||
|
@ -90,7 +90,7 @@ int bar(int n){
|
|||
|
||||
|
||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l31}}(
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0)
|
||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
||||
// CHECK: br label {{%?}}[[EXEC:.+]]
|
||||
//
|
||||
|
|
|
@ -54,7 +54,7 @@ int bar(int n){
|
|||
|
||||
// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l27}}(
|
||||
//
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0)
|
||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
||||
// CHECK: br label {{%?}}[[EXECUTE:.+]]
|
||||
//
|
||||
|
@ -242,7 +242,7 @@ int bar(int n){
|
|||
|
||||
// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l32}}(
|
||||
//
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0)
|
||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
||||
// CHECK: br label {{%?}}[[EXECUTE:.+]]
|
||||
//
|
||||
|
@ -520,7 +520,7 @@ int bar(int n){
|
|||
|
||||
// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l38}}(
|
||||
//
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0)
|
||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
||||
// CHECK: br label {{%?}}[[EXECUTE:.+]]
|
||||
//
|
||||
|
|
|
@ -227,7 +227,7 @@ int bar(int n){
|
|||
// CHECK: ret void
|
||||
|
||||
// CHECK: define weak void @__omp_offloading_{{.*}}ftemplate{{.*}}_l37(
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0)
|
||||
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
|
||||
// CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack(
|
||||
// CHECK-NOT: call void @__kmpc_serialized_parallel(
|
||||
|
|
|
@ -75,7 +75,7 @@ int bar(int n){
|
|||
|
||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l32(
|
||||
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 1)
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
|
||||
// CHECK: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} 4, i16 1, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
|
||||
// CHECK: [[TEAM_ALLOC:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
|
||||
// CHECK: [[BC:%.+]] = bitcast i8* [[TEAM_ALLOC]] to [[REC:%.+]]*
|
||||
|
|
|
@ -70,7 +70,7 @@ int bar(int n){
|
|||
|
||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l30(
|
||||
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 1)
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
|
||||
// CHECK: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} 4, i16 1, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
|
||||
// CHECK: [[TEAM_ALLOC:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
|
||||
// CHECK: [[BC:%.+]] = bitcast i8* [[TEAM_ALLOC]] to [[REC:%.+]]*
|
||||
|
|
Loading…
Reference in New Issue