[OPENMP][NVPTX]Extend number of constructs executed in SPMD mode.

If the statements between target|teams|distribute directives does not
require execution in master thread, like constant expressions, null
statements, simple declarations, etc., such construct can be xecuted in
SPMD mode.

llvm-svn: 346551
This commit is contained in:
Alexey Bataev 2018-11-09 20:03:19 +00:00
parent c025f09ee4
commit 8bcc69c054
8 changed files with 88 additions and 57 deletions

View File

@ -698,12 +698,58 @@ getDataSharingMode(CodeGenModule &CGM) {
: CGOpenMPRuntimeNVPTX::Generic;
}
// Checks if the expression is constant or does not have non-trivial function
// calls.
static bool isTrivial(ASTContext &Ctx, const Expr * E) {
// We can skip constant expressions.
// We can skip expressions with trivial calls or simple expressions.
return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
!E->hasNonTrivialCall(Ctx)) &&
!E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
}
/// Checks if the \p Body is the \a CompoundStmt and returns its child statement
/// iff there is only one.
static const Stmt *getSingleCompoundChild(const Stmt *Body) {
if (const auto *C = dyn_cast<CompoundStmt>(Body))
if (C->size() == 1)
return C->body_front();
/// iff there is only one that is not evaluatable at the compile time.
static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) {
if (const auto *C = dyn_cast<CompoundStmt>(Body)) {
const Stmt *Child = nullptr;
for (const Stmt *S : C->body()) {
if (const auto *E = dyn_cast<Expr>(S)) {
if (isTrivial(Ctx, E))
continue;
}
// Some of the statements can be ignored.
if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
continue;
// Analyze declarations.
if (const auto *DS = dyn_cast<DeclStmt>(S)) {
if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
isa<UsingDirectiveDecl>(D) ||
isa<OMPDeclareReductionDecl>(D) ||
isa<OMPThreadPrivateDecl>(D))
return true;
const auto *VD = dyn_cast<VarDecl>(D);
if (!VD)
return false;
return VD->isConstexpr() ||
((VD->getType().isTrivialType(Ctx) ||
VD->getType()->isReferenceType()) &&
(!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
}))
continue;
}
// Found multiple children - cannot get the one child only.
if (Child)
return Body;
Child = S;
}
if (Child)
return Child;
}
return Body;
}
@ -732,7 +778,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
const auto *CS = D.getInnermostCapturedStmt();
const auto *Body =
CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
const Stmt *ChildStmt = getSingleCompoundChild(Body);
const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body);
if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
@ -746,7 +792,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
/*IgnoreCaptured=*/true);
if (!Body)
return false;
ChildStmt = getSingleCompoundChild(Body);
ChildStmt = getSingleCompoundChild(Ctx, Body);
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
if (isOpenMPParallelDirective(DKind) &&
@ -905,7 +951,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx,
const auto *CS = D.getInnermostCapturedStmt();
const auto *Body =
CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
const Stmt *ChildStmt = getSingleCompoundChild(Body);
const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body);
if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
@ -920,7 +966,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx,
/*IgnoreCaptured=*/true);
if (!Body)
return false;
ChildStmt = getSingleCompoundChild(Body);
ChildStmt = getSingleCompoundChild(Ctx, Body);
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
if (isOpenMPWorksharingDirective(DKind) &&
@ -932,7 +978,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx,
/*IgnoreCaptured=*/true);
if (!Body)
return false;
ChildStmt = getSingleCompoundChild(Body);
ChildStmt = getSingleCompoundChild(Ctx, Body);
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
if (isOpenMPParallelDirective(DKind) &&
@ -944,7 +990,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx,
/*IgnoreCaptured=*/true);
if (!Body)
return false;
ChildStmt = getSingleCompoundChild(Body);
ChildStmt = getSingleCompoundChild(Ctx, Body);
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
if (isOpenMPWorksharingDirective(DKind) &&
@ -965,7 +1011,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx,
/*IgnoreCaptured=*/true);
if (!Body)
return false;
ChildStmt = getSingleCompoundChild(Body);
ChildStmt = getSingleCompoundChild(Ctx, Body);
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
if (isOpenMPWorksharingDirective(DKind) &&
@ -1287,10 +1333,6 @@ void CGOpenMPRuntimeNVPTX::emitSPMDKernel(const OMPExecutableDirective &D,
IsInTTDRegion = false;
}
static void
getDistributeLastprivateVars(const OMPExecutableDirective &D,
llvm::SmallVectorImpl<const ValueDecl *> &Vars);
void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader(
CodeGenFunction &CGF, EntryFunctionState &EST,
const OMPExecutableDirective &D) {
@ -1303,33 +1345,10 @@ void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader(
// Initialize the OMP state in the runtime; called by all active threads.
bool RequiresFullRuntime = CGM.getLangOpts().OpenMPCUDAForceFullRuntime ||
!supportsLightweightRuntime(CGF.getContext(), D);
// Check if we have inner distribute + lastprivate|reduction clauses.
bool RequiresDatasharing = RequiresFullRuntime;
if (!RequiresDatasharing) {
const OMPExecutableDirective *TD = &D;
if (!isOpenMPTeamsDirective(TD->getDirectiveKind()) &&
!isOpenMPParallelDirective(TD->getDirectiveKind())) {
const Stmt *S = getSingleCompoundChild(
TD->getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(
/*IgnoreCaptured=*/true));
TD = cast<OMPExecutableDirective>(S);
}
if (!isOpenMPDistributeDirective(TD->getDirectiveKind()) &&
!isOpenMPParallelDirective(TD->getDirectiveKind())) {
const Stmt *S = getSingleCompoundChild(
TD->getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(
/*IgnoreCaptured=*/true));
TD = cast<OMPExecutableDirective>(S);
}
if (isOpenMPDistributeDirective(TD->getDirectiveKind()))
RequiresDatasharing = TD->hasClausesOfKind<OMPLastprivateClause>() ||
TD->hasClausesOfKind<OMPReductionClause>();
}
llvm::Value *Args[] = {
getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true),
/*RequiresOMPRuntime=*/
Bld.getInt16(RequiresFullRuntime ? 1 : 0),
/*RequiresDataSharing=*/Bld.getInt16(RequiresDatasharing ? 1 : 0)};
llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true),
/*RequiresOMPRuntime=*/
Bld.getInt16(RequiresFullRuntime ? 1 : 0),
/*RequiresDataSharing=*/Bld.getInt16(0)};
CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
@ -1928,13 +1947,14 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
/// Get list of lastprivate variables from the teams distribute ... or
/// teams {distribute ...} directives.
static void
getDistributeLastprivateVars(const OMPExecutableDirective &D,
getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D,
llvm::SmallVectorImpl<const ValueDecl *> &Vars) {
assert(isOpenMPTeamsDirective(D.getDirectiveKind()) &&
"expected teams directive.");
const OMPExecutableDirective *Dir = &D;
if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
if (const Stmt *S = getSingleCompoundChild(
Ctx,
D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(
/*IgnoreCaptured=*/true))) {
Dir = dyn_cast<OMPExecutableDirective>(S);
@ -1961,7 +1981,7 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
llvm::SmallVector<const ValueDecl *, 4> LastPrivates;
llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) {
getDistributeLastprivateVars(D, LastPrivates);
getDistributeLastprivateVars(CGM.getContext(), D, LastPrivates);
if (!LastPrivates.empty())
GlobalizedRD = ::buildRecordForGlobalizedVars(
CGM.getContext(), llvm::None, LastPrivates, MappedDeclsFields);

View File

@ -40,7 +40,7 @@ void foo() {
for (int i = 0; i < 10; ++i)
;
int a;
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 1)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
@ -76,17 +76,28 @@ int a;
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
#pragma omp target teams
{
int b;
#pragma omp distribute parallel for simd
for (int i = 0; i < 10; ++i)
;
;
}
#pragma omp target teams
{
int b[] = {2, 3, sizeof(int)};
#pragma omp distribute parallel for simd schedule(static)
for (int i = 0; i < 10; ++i)
;
}
#pragma omp target teams
{
int b;
#pragma omp distribute parallel for simd schedule(static, 1)
for (int i = 0; i < 10; ++i)
;
int &c = b;
}
#pragma omp target teams
#pragma omp distribute parallel for simd schedule(auto)
for (int i = 0; i < 10; ++i)

View File

@ -59,7 +59,7 @@ int bar(int n){
// CHECK: store i16* {{%.+}}, i16** [[AA_ADDR]], align
// CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align
// CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1, i16 1)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1, i16 0)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: br label {{%?}}[[EXEC:.+]]
//
@ -102,7 +102,7 @@ int bar(int n){
// CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align
// CHECK: [[B:%.+]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align
// CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1, i16 1)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1, i16 0)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: br label {{%?}}[[EXEC:.+]]
//

View File

@ -47,7 +47,7 @@ int bar(int n){
}
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l22}}(
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: br label {{%?}}[[EXEC:.+]]
//
@ -69,7 +69,7 @@ int bar(int n){
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l26}}(
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: br label {{%?}}[[EXEC:.+]]
//
@ -90,7 +90,7 @@ int bar(int n){
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l31}}(
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: br label {{%?}}[[EXEC:.+]]
//

View File

@ -54,7 +54,7 @@ int bar(int n){
// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l27}}(
//
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: br label {{%?}}[[EXECUTE:.+]]
//
@ -242,7 +242,7 @@ int bar(int n){
// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l32}}(
//
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: br label {{%?}}[[EXECUTE:.+]]
//
@ -520,7 +520,7 @@ int bar(int n){
// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l38}}(
//
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: br label {{%?}}[[EXECUTE:.+]]
//

View File

@ -227,7 +227,7 @@ int bar(int n){
// CHECK: ret void
// CHECK: define weak void @__omp_offloading_{{.*}}ftemplate{{.*}}_l37(
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack(
// CHECK-NOT: call void @__kmpc_serialized_parallel(

View File

@ -75,7 +75,7 @@ int bar(int n){
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l32(
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 1)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
// CHECK: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} 4, i16 1, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
// CHECK: [[TEAM_ALLOC:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
// CHECK: [[BC:%.+]] = bitcast i8* [[TEAM_ALLOC]] to [[REC:%.+]]*

View File

@ -70,7 +70,7 @@ int bar(int n){
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l30(
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 1)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
// CHECK: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} 4, i16 1, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
// CHECK: [[TEAM_ALLOC:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
// CHECK: [[BC:%.+]] = bitcast i8* [[TEAM_ALLOC]] to [[REC:%.+]]*