forked from OSchip/llvm-project
[OPENMP, NVPTX] Do not use SPMD mode for target simd and target teams
distribute simd directives. Directives `target simd` and `target teams distribute simd` must be executed in non-SPMD mode. llvm-svn: 332129
This commit is contained in:
parent
304d0d5560
commit
df093e7b45
|
@ -628,9 +628,8 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
|
|||
OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
|
||||
switch (D.getDirectiveKind()) {
|
||||
case OMPD_target:
|
||||
if ((isOpenMPParallelDirective(DKind) &&
|
||||
!hasParallelIfClause(Ctx, *NestedDir)) ||
|
||||
isOpenMPSimdDirective(DKind))
|
||||
if (isOpenMPParallelDirective(DKind) &&
|
||||
!hasParallelIfClause(Ctx, *NestedDir))
|
||||
return true;
|
||||
if (DKind == OMPD_teams || DKind == OMPD_teams_distribute) {
|
||||
Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
|
||||
|
@ -639,9 +638,8 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
|
|||
ChildStmt = getSingleCompoundChild(Body);
|
||||
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
||||
DKind = NND->getDirectiveKind();
|
||||
if ((isOpenMPParallelDirective(DKind) &&
|
||||
!hasParallelIfClause(Ctx, *NND)) ||
|
||||
isOpenMPSimdDirective(DKind))
|
||||
if (isOpenMPParallelDirective(DKind) &&
|
||||
!hasParallelIfClause(Ctx, *NND))
|
||||
return true;
|
||||
if (DKind == OMPD_distribute) {
|
||||
Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
|
||||
|
@ -652,18 +650,16 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
|
|||
return false;
|
||||
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
||||
DKind = NND->getDirectiveKind();
|
||||
return (isOpenMPParallelDirective(DKind) &&
|
||||
!hasParallelIfClause(Ctx, *NND)) ||
|
||||
isOpenMPSimdDirective(DKind);
|
||||
return isOpenMPParallelDirective(DKind) &&
|
||||
!hasParallelIfClause(Ctx, *NND);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
case OMPD_target_teams:
|
||||
if ((isOpenMPParallelDirective(DKind) &&
|
||||
!hasParallelIfClause(Ctx, *NestedDir)) ||
|
||||
isOpenMPSimdDirective(DKind))
|
||||
if (isOpenMPParallelDirective(DKind) &&
|
||||
!hasParallelIfClause(Ctx, *NestedDir))
|
||||
return true;
|
||||
if (DKind == OMPD_distribute) {
|
||||
Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
|
||||
|
@ -672,16 +668,14 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
|
|||
ChildStmt = getSingleCompoundChild(Body);
|
||||
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
||||
DKind = NND->getDirectiveKind();
|
||||
return (isOpenMPParallelDirective(DKind) &&
|
||||
!hasParallelIfClause(Ctx, *NND)) ||
|
||||
isOpenMPSimdDirective(DKind);
|
||||
return isOpenMPParallelDirective(DKind) &&
|
||||
!hasParallelIfClause(Ctx, *NND);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
case OMPD_target_teams_distribute:
|
||||
return (isOpenMPParallelDirective(DKind) &&
|
||||
!hasParallelIfClause(Ctx, *NestedDir)) ||
|
||||
isOpenMPSimdDirective(DKind);
|
||||
return isOpenMPParallelDirective(DKind) &&
|
||||
!hasParallelIfClause(Ctx, *NestedDir);
|
||||
case OMPD_target_simd:
|
||||
case OMPD_target_parallel:
|
||||
case OMPD_target_parallel_for:
|
||||
|
@ -755,7 +749,7 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx,
|
|||
return !hasParallelIfClause(Ctx, D);
|
||||
case OMPD_target_simd:
|
||||
case OMPD_target_teams_distribute_simd:
|
||||
return true;
|
||||
return false;
|
||||
case OMPD_parallel:
|
||||
case OMPD_for:
|
||||
case OMPD_parallel_for:
|
||||
|
|
|
@ -8,11 +8,11 @@
|
|||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode.
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l25}}_exec_mode = weak constant i8 0
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l35}}_exec_mode = weak constant i8 0
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l40}}_exec_mode = weak constant i8 0
|
||||
// Check that the execution mode of all 2 target regions on the gpu is set to NonSPMD Mode.
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l25}}_exec_mode = weak constant i8 1
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 1
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l35}}_exec_mode = weak constant i8 1
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l40}}_exec_mode = weak constant i8 1
|
||||
|
||||
#define N 1000
|
||||
|
||||
|
@ -54,38 +54,34 @@ int bar(int n){
|
|||
}
|
||||
|
||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l25}}(
|
||||
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
|
||||
// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1)
|
||||
// CHECK-NOT: call void @__kmpc_for_static_init
|
||||
// CHECK-NOT: call void @__kmpc_for_static_fini
|
||||
// CHECK: call void @__kmpc_spmd_kernel_deinit()
|
||||
// CHECK: call void @__kmpc_kernel_deinit(i16 1)
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l30}}(
|
||||
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
|
||||
// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1)
|
||||
// CHECK-NOT: call void @__kmpc_for_static_init
|
||||
// CHECK-NOT: call void @__kmpc_for_static_fini
|
||||
// CHECK: call void @__kmpc_spmd_kernel_deinit()
|
||||
// CHECK: call void @__kmpc_kernel_deinit(i16 1)
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l35}}(
|
||||
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
|
||||
// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1)
|
||||
// CHECK-NOT: call void @__kmpc_for_static_init
|
||||
// CHECK-NOT: call void @__kmpc_for_static_fini
|
||||
// CHECK: call void @__kmpc_spmd_kernel_deinit()
|
||||
// CHECK: call void @__kmpc_kernel_deinit(i16 1)
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l40}}(
|
||||
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
|
||||
// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1)
|
||||
// CHECK-NOT: call void @__kmpc_for_static_init
|
||||
// CHECK-NOT: call void @__kmpc_for_static_fini
|
||||
// CHECK: [[RES:%.+]] = call i32 @__kmpc_nvptx_simd_reduce_nowait(i32 %{{.+}}, i32 1, i{{64|32}} {{8|4}}, i8* %{{.+}}, void (i8*, i16, i16, i16)* @{{.+}}, void (i8*, i32)* @{{.+}})
|
||||
// CHECK: switch i32 [[RES]]
|
||||
// CHECK: call void @__kmpc_nvptx_end_reduce_nowait(i32 %{{.+}})
|
||||
// CHECK: call void @__kmpc_spmd_kernel_deinit()
|
||||
// CHECK: call void @__kmpc_kernel_deinit(i16 1)
|
||||
// CHECK: ret void
|
||||
|
||||
|
||||
|
|
|
@ -8,11 +8,11 @@
|
|||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode.
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 0
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 0
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 0
|
||||
// Check that the execution mode of all 2 target regions on the gpu is set to NonSPMD Mode.
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 1
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 1
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 1
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 1
|
||||
|
||||
#define N 1000
|
||||
#define M 10
|
||||
|
@ -62,38 +62,34 @@ int bar(int n){
|
|||
return a;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
|
||||
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
|
||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l30(
|
||||
// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1)
|
||||
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: call void @__kmpc_spmd_kernel_deinit()
|
||||
// CHECK: call void @__kmpc_kernel_deinit(i16 1)
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
|
||||
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
|
||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l36(
|
||||
// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1)
|
||||
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: call void @__kmpc_spmd_kernel_deinit()
|
||||
// CHECK: call void @__kmpc_kernel_deinit(i16 1)
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
|
||||
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
|
||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l41(
|
||||
// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1)
|
||||
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: call void @__kmpc_spmd_kernel_deinit()
|
||||
// CHECK: call void @__kmpc_kernel_deinit(i16 1)
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]])
|
||||
// CHECK: define {{.*}}void {{@__omp_offloading_.+}}_l46({{.+}}, i{{32|64}} [[F_IN:%.+]])
|
||||
// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},
|
||||
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
|
||||
// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1)
|
||||
// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
|
||||
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: call void @__kmpc_spmd_kernel_deinit()
|
||||
// CHECK: call void @__kmpc_kernel_deinit(i16 1)
|
||||
// CHECK: ret void
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue