forked from OSchip/llvm-project
[OPENMP] Initial codegen for 'parallel sections' directive.
Emits code for outlined 'parallel' directive with the implicitly inlined 'sections' directive: ... call __kmpc_fork_call(..., outlined_function, ...); ... define internal void outlined_function(...) { <code for implicit sections directive>; } Differential Revision: http://reviews.llvm.org/D8997 llvm-svn: 234849
This commit is contained in:
parent
87afdeb8f5
commit
68adb7da1a
|
@ -883,8 +883,8 @@ static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
|
|||
return LVal;
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
|
||||
LexicalScope Scope(*this, S.getSourceRange());
|
||||
static OpenMPDirectiveKind emitSections(CodeGenFunction &CGF,
|
||||
const OMPExecutableDirective &S) {
|
||||
auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
|
||||
auto *CS = dyn_cast<CompoundStmt>(Stmt);
|
||||
if (CS && CS->size() > 1) {
|
||||
|
@ -904,9 +904,9 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
|
|||
// Loop counter.
|
||||
LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
|
||||
OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
|
||||
OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
|
||||
CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
|
||||
OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
|
||||
OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
|
||||
CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
|
||||
// Generate condition for loop.
|
||||
BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
|
||||
OK_Ordinary, S.getLocStart(),
|
||||
|
@ -959,26 +959,27 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
|
|||
OMPC_SCHEDULE_static);
|
||||
};
|
||||
|
||||
CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
|
||||
} else {
|
||||
// If only one section is found - no need to generate loop, emit as a
|
||||
// single
|
||||
// region.
|
||||
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
||||
CGF.EmitStmt(
|
||||
cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
||||
CGF.EnsureInsertPoint();
|
||||
};
|
||||
CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
|
||||
llvm::None, llvm::None, llvm::None,
|
||||
llvm::None);
|
||||
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, CodeGen);
|
||||
return OMPD_sections;
|
||||
}
|
||||
// If only one section is found - no need to generate loop, emit as a single
|
||||
// region.
|
||||
auto &&CodeGen = [Stmt](CodeGenFunction &CGF) {
|
||||
CGF.EmitStmt(Stmt);
|
||||
CGF.EnsureInsertPoint();
|
||||
};
|
||||
CGF.CGM.getOpenMPRuntime().emitSingleRegion(CGF, CodeGen, S.getLocStart(),
|
||||
llvm::None, llvm::None,
|
||||
llvm::None, llvm::None);
|
||||
return OMPD_single;
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
|
||||
LexicalScope Scope(*this, S.getSourceRange());
|
||||
OpenMPDirectiveKind EmittedAs = emitSections(*this, S);
|
||||
// Emit an implicit barrier at the end.
|
||||
if (!S.getSingleClause(OMPC_nowait)) {
|
||||
CGM.getOpenMPRuntime().emitBarrierCall(
|
||||
*this, S.getLocStart(),
|
||||
(CS && CS->size() > 1) ? OMPD_sections : OMPD_single);
|
||||
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1071,8 +1072,17 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective(
|
|||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPParallelSectionsDirective(
|
||||
const OMPParallelSectionsDirective &) {
|
||||
llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet.");
|
||||
const OMPParallelSectionsDirective &S) {
|
||||
// Emit directive as a combined directive that consists of two implicit
|
||||
// directives: 'parallel' with 'sections' directive.
|
||||
LexicalScope Scope(*this, S.getSourceRange());
|
||||
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
||||
(void)emitSections(CGF, S);
|
||||
// Emit implicit barrier at the end of parallel region.
|
||||
CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
|
||||
OMPD_parallel);
|
||||
};
|
||||
emitCommonOMPParallelDirective(*this, S, CodeGen);
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
|
||||
|
|
|
@ -2085,15 +2085,16 @@ public:
|
|||
void EmitOMPTargetDirective(const OMPTargetDirective &S);
|
||||
void EmitOMPTeamsDirective(const OMPTeamsDirective &S);
|
||||
|
||||
void
|
||||
EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
|
||||
const Expr *IncExpr,
|
||||
const llvm::function_ref<void(CodeGenFunction &)> &BodyGen);
|
||||
|
||||
private:
|
||||
|
||||
/// Helpers for the OpenMP loop directives.
|
||||
void EmitOMPLoopBody(const OMPLoopDirective &Directive,
|
||||
bool SeparateIter = false);
|
||||
void
|
||||
EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
|
||||
const Expr *IncExpr,
|
||||
const llvm::function_ref<void(CodeGenFunction &)> &BodyGen);
|
||||
void EmitOMPSimdFinal(const OMPLoopDirective &S);
|
||||
void EmitOMPWorksharingLoop(const OMPLoopDirective &S);
|
||||
void EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
|
||||
|
|
|
@ -1117,7 +1117,11 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
|
|||
break;
|
||||
}
|
||||
case OMPD_parallel_sections: {
|
||||
QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
|
||||
QualType KmpInt32PtrTy = Context.getPointerType(KmpInt32Ty);
|
||||
Sema::CapturedParamNameType Params[] = {
|
||||
std::make_pair(".global_tid.", KmpInt32PtrTy),
|
||||
std::make_pair(".bound_tid.", KmpInt32PtrTy),
|
||||
std::make_pair(StringRef(), QualType()) // __context with shared vars
|
||||
};
|
||||
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
|
||||
|
|
|
@ -0,0 +1,98 @@
|
|||
// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -emit-llvm -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -o - %s | FileCheck %s
|
||||
// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -fexceptions -fcxx-exceptions -triple x86_64-unknown-unknown -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-llvm -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
// CHECK: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
|
||||
// CHECK-LABEL: foo
|
||||
void foo() {};
|
||||
// CHECK-LABEL: bar
|
||||
void bar() {};
|
||||
|
||||
template <class T>
|
||||
T tmain() {
|
||||
#pragma omp parallel sections
|
||||
{
|
||||
foo();
|
||||
}
|
||||
return T();
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @main
|
||||
int main() {
|
||||
// CHECK: call void (%{{.+}}*, i32, void (i32*, i32*, ...)*, ...)* @__kmpc_fork_call(%{{.+}}* @{{.+}}, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %{{.+}}*)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
|
||||
// CHECK-LABEL: }
|
||||
// CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* [[GTID_PARAM_ADDR:%.+]], i32* %{{.+}}, %{{.+}}* %{{.+}})
|
||||
// CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]],
|
||||
#pragma omp parallel sections
|
||||
{
|
||||
// CHECK: store i32 0, i32* [[LB_PTR:%.+]],
|
||||
// CHECK: store i32 1, i32* [[UB_PTR:%.+]],
|
||||
// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
|
||||
// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
|
||||
// CHECK: call void @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_PTR:%.+]], i32* [[LB_PTR]], i32* [[UB_PTR]], i32* [[STRIDE_PTR:%.+]], i32 1, i32 1)
|
||||
// <<UB = min(UB, GlobalUB);>>
|
||||
// CHECK: [[UB:%.+]] = load i32, i32* [[UB_PTR]]
|
||||
// CHECK: [[CMP:%.+]] = icmp slt i32 [[UB]], 1
|
||||
// CHECK: [[MIN_UB_GLOBALUB:%.+]] = select i1 [[CMP]], i32 [[UB]], i32 1
|
||||
// CHECK: store i32 [[MIN_UB_GLOBALUB]], i32* [[UB_PTR]]
|
||||
// <<IV = LB;>>
|
||||
// CHECK: [[LB:%.+]] = load i32, i32* [[LB_PTR]]
|
||||
// CHECK: store i32 [[LB]], i32* [[IV_PTR:%.+]]
|
||||
// CHECK: br label %[[INNER_FOR_COND:.+]]
|
||||
// CHECK: [[INNER_FOR_COND]]
|
||||
// <<IV <= UB?>>
|
||||
// CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]]
|
||||
// CHECK: [[UB:%.+]] = load i32, i32* [[UB_PTR]]
|
||||
// CHECK: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB]]
|
||||
// CHECK: br i1 [[CMP]], label %[[INNER_LOOP_BODY:.+]], label %[[INNER_LOOP_END:.+]]
|
||||
// CHECK: [[INNER_LOOP_BODY]]
|
||||
// <<TRUE>> - > <BODY>
|
||||
// CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]]
|
||||
// CHECK: switch i32 [[IV]], label %[[SECTIONS_EXIT:.+]] [
|
||||
// CHECK-NEXT: i32 0, label %[[SECTIONS_CASE0:.+]]
|
||||
// CHECK-NEXT: i32 1, label %[[SECTIONS_CASE1:.+]]
|
||||
#pragma omp section
|
||||
// CHECK: [[SECTIONS_CASE0]]
|
||||
// CHECK-NEXT: invoke void @{{.*}}foo{{.*}}()
|
||||
// CHECK: br label %[[SECTIONS_EXIT]]
|
||||
foo();
|
||||
#pragma omp section
|
||||
// CHECK: [[SECTIONS_CASE1]]
|
||||
// CHECK-NEXT: invoke void @{{.*}}bar{{.*}}()
|
||||
// CHECK: br label %[[SECTIONS_EXIT]]
|
||||
bar();
|
||||
// CHECK: [[SECTIONS_EXIT]]
|
||||
// <<++IV;>>
|
||||
// CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]]
|
||||
// CHECK-NEXT: [[INC:%.+]] = add nsw i32 [[IV]], 1
|
||||
// CHECK-NEXT: store i32 [[INC]], i32* [[IV_PTR]]
|
||||
// CHECK-NEXT: br label %[[INNER_FOR_COND]]
|
||||
// CHECK: [[INNER_LOOP_END]]
|
||||
}
|
||||
// CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]])
|
||||
// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]],
|
||||
return tmain<int>();
|
||||
}
|
||||
|
||||
// CHECK-LABEL: tmain
|
||||
// CHECK: call void {{.*}} @__kmpc_fork_call(
|
||||
// CHECK-NOT: __kmpc_global_thread_num
|
||||
// CHECK: [[RES:%.+]] = call i32 @__kmpc_single(
|
||||
// CHECK-NEXT: [[BOOLRES:%.+]] = icmp ne i32 [[RES]], 0
|
||||
// CHECK-NEXT: br i1 [[BOOLRES]], label %[[THEN:.+]], label %[[END:.+]]
|
||||
// CHECK: [[THEN]]
|
||||
// CHECK-NEXT: invoke void @{{.*}}foo{{.*}}()
|
||||
// CHECK-NEXT: unwind label %[[TERM_LPAD:.+]]
|
||||
// CHECK: call void @__kmpc_end_single(
|
||||
// CHECK-NEXT: br label %[[END]]
|
||||
// CHECK: [[END]]
|
||||
// CHECK-NEXT: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]],
|
||||
// CHECK-NEXT: ret
|
||||
// CHECK: [[TERM_LPAD]]
|
||||
// CHECK: call void @__clang_call_terminate(i8*
|
||||
// CHECK-NEXT: unreachable
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue