forked from OSchip/llvm-project
[Flang][OpenMP] Upstream the lowering of the parallel do combined construct
When parallel is used in a combined construct, then use a separate function to create the parallel operation. It handles the parallel specific clauses and leaves the rest for handling at the inner operations. Reviewed By: peixin, shraiysh Differential Revision: https://reviews.llvm.org/D125465 Co-authored-by: Sourabh Singh Tomar <SourabhSingh.Tomar@amd.com> Co-authored-by: Eric Schweitz <eschweitz@nvidia.com> Co-authored-by: Valentin Clement <clementval@gmail.com> Co-authored-by: Nimish Mishra <neelam.nimish@gmail.com>
This commit is contained in:
parent
c153c61fad
commit
4202d69d9e
|
@ -278,6 +278,80 @@ genOMP(Fortran::lower::AbstractConverter &converter,
|
|||
standaloneConstruct.u);
|
||||
}
|
||||
|
||||
static omp::ClauseProcBindKindAttr genProcBindKindAttr(
|
||||
fir::FirOpBuilder &firOpBuilder,
|
||||
const Fortran::parser::OmpClause::ProcBind *procBindClause) {
|
||||
omp::ClauseProcBindKind pbKind;
|
||||
switch (procBindClause->v.v) {
|
||||
case Fortran::parser::OmpProcBindClause::Type::Master:
|
||||
pbKind = omp::ClauseProcBindKind::Master;
|
||||
break;
|
||||
case Fortran::parser::OmpProcBindClause::Type::Close:
|
||||
pbKind = omp::ClauseProcBindKind::Close;
|
||||
break;
|
||||
case Fortran::parser::OmpProcBindClause::Type::Spread:
|
||||
pbKind = omp::ClauseProcBindKind::Spread;
|
||||
break;
|
||||
case Fortran::parser::OmpProcBindClause::Type::Primary:
|
||||
pbKind = omp::ClauseProcBindKind::Primary;
|
||||
break;
|
||||
}
|
||||
return omp::ClauseProcBindKindAttr::get(firOpBuilder.getContext(), pbKind);
|
||||
}
|
||||
|
||||
/* When parallel is used in a combined construct, then use this function to
|
||||
* create the parallel operation. It handles the parallel specific clauses
|
||||
* and leaves the rest for handling at the inner operations.
|
||||
* TODO: Refactor clause handling
|
||||
*/
|
||||
template <typename Directive>
|
||||
static void
|
||||
createCombinedParallelOp(Fortran::lower::AbstractConverter &converter,
|
||||
Fortran::lower::pft::Evaluation &eval,
|
||||
const Directive &directive) {
|
||||
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
||||
mlir::Location currentLocation = converter.getCurrentLocation();
|
||||
Fortran::lower::StatementContext stmtCtx;
|
||||
llvm::ArrayRef<mlir::Type> argTy;
|
||||
mlir::Value ifClauseOperand, numThreadsClauseOperand;
|
||||
SmallVector<Value> allocatorOperands, allocateOperands;
|
||||
mlir::omp::ClauseProcBindKindAttr procBindKindAttr;
|
||||
const auto &opClauseList =
|
||||
std::get<Fortran::parser::OmpClauseList>(directive.t);
|
||||
// TODO: Handle the following clauses
|
||||
// 1. default
|
||||
// 2. copyin
|
||||
// Note: rest of the clauses are handled when the inner operation is created
|
||||
for (const Fortran::parser::OmpClause &clause : opClauseList.v) {
|
||||
if (const auto &ifClause =
|
||||
std::get_if<Fortran::parser::OmpClause::If>(&clause.u)) {
|
||||
auto &expr = std::get<Fortran::parser::ScalarLogicalExpr>(ifClause->v.t);
|
||||
mlir::Value ifVal = fir::getBase(
|
||||
converter.genExprValue(*Fortran::semantics::GetExpr(expr), stmtCtx));
|
||||
ifClauseOperand = firOpBuilder.createConvert(
|
||||
currentLocation, firOpBuilder.getI1Type(), ifVal);
|
||||
} else if (const auto &numThreadsClause =
|
||||
std::get_if<Fortran::parser::OmpClause::NumThreads>(
|
||||
&clause.u)) {
|
||||
numThreadsClauseOperand = fir::getBase(converter.genExprValue(
|
||||
*Fortran::semantics::GetExpr(numThreadsClause->v), stmtCtx));
|
||||
} else if (const auto &procBindClause =
|
||||
std::get_if<Fortran::parser::OmpClause::ProcBind>(
|
||||
&clause.u)) {
|
||||
procBindKindAttr = genProcBindKindAttr(firOpBuilder, procBindClause);
|
||||
}
|
||||
}
|
||||
// Create and insert the operation.
|
||||
auto parallelOp = firOpBuilder.create<mlir::omp::ParallelOp>(
|
||||
currentLocation, argTy, ifClauseOperand, numThreadsClauseOperand,
|
||||
allocateOperands, allocatorOperands, /*reduction_vars=*/ValueRange(),
|
||||
/*reductions=*/nullptr, procBindKindAttr);
|
||||
|
||||
createBodyOfOp<omp::ParallelOp>(parallelOp, converter, currentLocation,
|
||||
&opClauseList, /*iv=*/{},
|
||||
/*isCombined=*/true);
|
||||
}
|
||||
|
||||
static void
|
||||
genOMP(Fortran::lower::AbstractConverter &converter,
|
||||
Fortran::lower::pft::Evaluation &eval,
|
||||
|
@ -318,23 +392,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
|
|||
} else if (const auto &procBindClause =
|
||||
std::get_if<Fortran::parser::OmpClause::ProcBind>(
|
||||
&clause.u)) {
|
||||
omp::ClauseProcBindKind pbKind;
|
||||
switch (procBindClause->v.v) {
|
||||
case Fortran::parser::OmpProcBindClause::Type::Master:
|
||||
pbKind = omp::ClauseProcBindKind::Master;
|
||||
break;
|
||||
case Fortran::parser::OmpProcBindClause::Type::Close:
|
||||
pbKind = omp::ClauseProcBindKind::Close;
|
||||
break;
|
||||
case Fortran::parser::OmpProcBindClause::Type::Spread:
|
||||
pbKind = omp::ClauseProcBindKind::Spread;
|
||||
break;
|
||||
case Fortran::parser::OmpProcBindClause::Type::Primary:
|
||||
pbKind = omp::ClauseProcBindKind::Primary;
|
||||
break;
|
||||
}
|
||||
procBindKindAttr =
|
||||
omp::ClauseProcBindKindAttr::get(firOpBuilder.getContext(), pbKind);
|
||||
procBindKindAttr = genProcBindKindAttr(firOpBuilder, procBindClause);
|
||||
} else if (const auto &allocateClause =
|
||||
std::get_if<Fortran::parser::OmpClause::Allocate>(
|
||||
&clause.u)) {
|
||||
|
@ -419,11 +477,17 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
|
|||
noWaitClauseOperand, orderedClauseOperand, orderClauseOperand;
|
||||
const auto &wsLoopOpClauseList = std::get<Fortran::parser::OmpClauseList>(
|
||||
std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t).t);
|
||||
if (llvm::omp::OMPD_do !=
|
||||
|
||||
const auto ompDirective =
|
||||
std::get<Fortran::parser::OmpLoopDirective>(
|
||||
std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t).t)
|
||||
.v) {
|
||||
TODO(converter.getCurrentLocation(), "Combined worksharing loop construct");
|
||||
.v;
|
||||
if (llvm::omp::OMPD_parallel_do == ompDirective) {
|
||||
createCombinedParallelOp<Fortran::parser::OmpBeginLoopDirective>(
|
||||
converter, eval,
|
||||
std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t));
|
||||
} else if (llvm::omp::OMPD_do != ompDirective) {
|
||||
TODO(converter.getCurrentLocation(), "Construct enclosing do loop");
|
||||
}
|
||||
|
||||
int64_t collapseValue = Fortran::lower::getCollapseValue(wsLoopOpClauseList);
|
||||
|
@ -648,15 +712,14 @@ genOMP(Fortran::lower::AbstractConverter &converter,
|
|||
|
||||
// Parallel Sections Construct
|
||||
if (dir == llvm::omp::Directive::OMPD_parallel_sections) {
|
||||
auto parallelOp = firOpBuilder.create<mlir::omp::ParallelOp>(
|
||||
currentLocation, /*if_expr_var*/ nullptr, /*num_threads_var*/ nullptr,
|
||||
allocateOperands, allocatorOperands, /*reduction_vars=*/ValueRange(),
|
||||
/*reductions=*/nullptr, /*proc_bind_val*/ nullptr);
|
||||
createBodyOfOp(parallelOp, converter, currentLocation);
|
||||
createCombinedParallelOp<Fortran::parser::OmpBeginSectionsDirective>(
|
||||
converter, eval,
|
||||
std::get<Fortran::parser::OmpBeginSectionsDirective>(
|
||||
sectionsConstruct.t));
|
||||
auto sectionsOp = firOpBuilder.create<mlir::omp::SectionsOp>(
|
||||
currentLocation, /*reduction_vars*/ ValueRange(),
|
||||
/*reductions=*/nullptr, /*allocate_vars*/ ValueRange(),
|
||||
/*allocators_vars*/ ValueRange(), /*nowait=*/nullptr);
|
||||
/*reductions=*/nullptr, allocateOperands, allocatorOperands,
|
||||
/*nowait=*/nullptr);
|
||||
createBodyOfOp(sectionsOp, converter, currentLocation);
|
||||
|
||||
// Sections Construct
|
||||
|
|
|
@ -71,3 +71,36 @@ func.func @_QPsb2(%arg0: !fir.ref<i32> {fir.bindc_name = "x"}, %arg1: !fir.ref<i
|
|||
// CHECK: }
|
||||
// CHECK: llvm.return
|
||||
// CHECK: }
|
||||
|
||||
|
||||
// -----
|
||||
|
||||
func.func @_QPsb(%arr: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "arr"}) {
|
||||
%0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsbEi"}
|
||||
omp.parallel {
|
||||
%c1 = arith.constant 1 : i32
|
||||
%c50 = arith.constant 50 : i32
|
||||
omp.wsloop for (%indx) : i32 = (%c1) to (%c50) inclusive step (%c1) {
|
||||
%1 = fir.convert %indx : (i32) -> i64
|
||||
%c1_i64 = arith.constant 1 : i64
|
||||
%2 = arith.subi %1, %c1_i64 : i64
|
||||
%3 = fir.coordinate_of %arr, %2 : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
|
||||
fir.store %indx to %3 : !fir.ref<i32>
|
||||
omp.yield
|
||||
}
|
||||
omp.terminator
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Check only for the structure of the OpenMP portion and the feasibility of the conversion
|
||||
// CHECK-LABEL: @_QPsb
|
||||
// CHECK-SAME: %{{.*}}: !llvm.ptr<struct<({{.*}})>> {fir.bindc_name = "arr"}
|
||||
// CHECK: omp.parallel {
|
||||
// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i32) : i32
|
||||
// CHECK: %[[C50:.*]] = llvm.mlir.constant(50 : i32) : i32
|
||||
// CHECK: omp.wsloop for (%[[INDX:.*]]) : i32 = (%[[C1]]) to (%[[C50]]) inclusive step (%[[C1]]) {
|
||||
// CHECK: llvm.store %[[INDX]], %{{.*}} : !llvm.ptr<i32>
|
||||
// CHECK: omp.yield
|
||||
// CHECK: omp.terminator
|
||||
// CHECK: llvm.return
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
! This test checks lowering of OpenMP DO Directive (Worksharing).
|
||||
|
||||
! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s
|
||||
|
||||
! CHECK-LABEL: func @_QPsimple_parallel_do()
|
||||
subroutine simple_parallel_do
|
||||
integer :: i
|
||||
! CHECK: omp.parallel
|
||||
! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32
|
||||
! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32
|
||||
! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32
|
||||
! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
|
||||
!$OMP PARALLEL DO
|
||||
do i=1, 9
|
||||
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
|
||||
print*, i
|
||||
end do
|
||||
! CHECK: omp.yield
|
||||
! CHECK: omp.terminator
|
||||
!$OMP END PARALLEL DO
|
||||
end subroutine
|
||||
|
||||
! CHECK-LABEL: func @_QPparallel_do_with_parallel_clauses
|
||||
! CHECK-SAME: %[[COND_REF:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}, %[[NT_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}
|
||||
subroutine parallel_do_with_parallel_clauses(cond, nt)
|
||||
logical :: cond
|
||||
integer :: nt
|
||||
integer :: i
|
||||
! CHECK: %[[COND:.*]] = fir.load %[[COND_REF]] : !fir.ref<!fir.logical<4>>
|
||||
! CHECK: %[[COND_CVT:.*]] = fir.convert %[[COND]] : (!fir.logical<4>) -> i1
|
||||
! CHECK: %[[NT:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
|
||||
! CHECK: omp.parallel if(%[[COND_CVT]] : i1) num_threads(%[[NT]] : i32) proc_bind(close)
|
||||
! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32
|
||||
! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32
|
||||
! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32
|
||||
! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
|
||||
!$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close)
|
||||
do i=1, 9
|
||||
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
|
||||
print*, i
|
||||
end do
|
||||
! CHECK: omp.yield
|
||||
! CHECK: omp.terminator
|
||||
!$OMP END PARALLEL DO
|
||||
end subroutine
|
||||
|
||||
! CHECK-LABEL: func @_QPparallel_do_with_clauses
|
||||
! CHECK-SAME: %[[NT_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}
|
||||
subroutine parallel_do_with_clauses(nt)
|
||||
integer :: nt
|
||||
integer :: i
|
||||
! CHECK: %[[NT:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
|
||||
! CHECK: omp.parallel num_threads(%[[NT]] : i32)
|
||||
! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32
|
||||
! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32
|
||||
! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32
|
||||
! CHECK: omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
|
||||
!$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic)
|
||||
do i=1, 9
|
||||
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
|
||||
print*, i
|
||||
end do
|
||||
! CHECK: omp.yield
|
||||
! CHECK: omp.terminator
|
||||
!$OMP END PARALLEL DO
|
||||
end subroutine
|
||||
|
||||
! CHECK-LABEL: func @_QPparallel_do_with_privatisation_clauses
|
||||
! CHECK-SAME: %[[COND_REF:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}, %[[NT_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}
|
||||
subroutine parallel_do_with_privatisation_clauses(cond,nt)
|
||||
logical :: cond
|
||||
integer :: nt
|
||||
integer :: i
|
||||
! CHECK: omp.parallel
|
||||
! CHECK: %[[PRIVATE_COND_REF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEcond"}
|
||||
! CHECK: %[[PRIVATE_NT_REF:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEnt"}
|
||||
! CHECK: %[[NT_VAL:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
|
||||
! CHECK: fir.store %[[NT_VAL]] to %[[PRIVATE_NT_REF]] : !fir.ref<i32>
|
||||
! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32
|
||||
! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32
|
||||
! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32
|
||||
! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
|
||||
!$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt)
|
||||
do i=1, 9
|
||||
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
|
||||
! CHECK: %[[PRIVATE_COND_VAL:.*]] = fir.load %[[PRIVATE_COND_REF]] : !fir.ref<!fir.logical<4>>
|
||||
! CHECK: %[[PRIVATE_COND_VAL_CVT:.*]] = fir.convert %[[PRIVATE_COND_VAL]] : (!fir.logical<4>) -> i1
|
||||
! CHECK: fir.call @_FortranAioOutputLogical({{.*}}, %[[PRIVATE_COND_VAL_CVT]]) : (!fir.ref<i8>, i1) -> i1
|
||||
! CHECK: %[[PRIVATE_NT_VAL:.*]] = fir.load %[[PRIVATE_NT_REF]] : !fir.ref<i32>
|
||||
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[PRIVATE_NT_VAL]]) : (!fir.ref<i8>, i32) -> i1
|
||||
print*, i, cond, nt
|
||||
end do
|
||||
! CHECK: omp.yield
|
||||
! CHECK: omp.terminator
|
||||
!$OMP END PARALLEL DO
|
||||
end subroutine
|
|
@ -40,8 +40,8 @@ subroutine omp_parallel_sections_allocate(x, y)
|
|||
integer, intent(inout) :: x, y
|
||||
!FIRDialect: %[[allocator:.*]] = arith.constant 1 : i32
|
||||
!LLVMDialect: %[[allocator:.*]] = llvm.mlir.constant(1 : i32) : i32
|
||||
!OMPDialect: omp.parallel allocate(%[[allocator]] : i32 -> %{{.*}} : !fir.ref<i32>) {
|
||||
!OMPDialect: omp.sections {
|
||||
!OMPDialect: omp.parallel {
|
||||
!OMPDialect: omp.sections allocate(%[[allocator]] : i32 -> %{{.*}} : !fir.ref<i32>) {
|
||||
!$omp parallel sections allocate(omp_high_bw_mem_alloc: x)
|
||||
!OMPDialect: omp.section {
|
||||
!$omp section
|
||||
|
|
Loading…
Reference in New Issue