[Flang][OpenMP] Add support for lastprivate clause for worksharing loop.

This patch adds an initial support to the lastprivate clause for worksharing loop.  The patch creates necessary control flow to guarantee the store of the value from the logical last iteration of the workshare loop.

Reviewed By: kiranchandramohan

Differential Revision: https://reviews.llvm.org/D130027
This commit is contained in:
Arnamoy Bhattacharyya 2022-07-25 20:31:23 -04:00
parent b2c53a9217
commit 17d9bdf460
4 changed files with 308 additions and 15 deletions

View File

@ -18,6 +18,7 @@
#include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Optimizer/Builder/BoxValue.h"
#include "flang/Semantics/symbol.h" #include "flang/Semantics/symbol.h"
#include "mlir/IR/BuiltinOps.h" #include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/Operation.h"
#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/ArrayRef.h"
namespace fir { namespace fir {
@ -101,7 +102,8 @@ public:
virtual bool virtual bool
createHostAssociateVarClone(const Fortran::semantics::Symbol &sym) = 0; createHostAssociateVarClone(const Fortran::semantics::Symbol &sym) = 0;
virtual void copyHostAssociateVar(const Fortran::semantics::Symbol &sym) = 0; virtual void copyHostAssociateVar(const Fortran::semantics::Symbol &sym,
mlir::Block *lastPrivBlock = nullptr) = 0;
/// Collect the set of ultimate symbols of symbols with \p flag in \p eval /// Collect the set of ultimate symbols of symbols with \p flag in \p eval
/// region if \p isUltimateSymbol is true. Otherwise, collect the set of /// region if \p isUltimateSymbol is true. Otherwise, collect the set of

View File

@ -44,6 +44,7 @@
#include "flang/Runtime/iostat.h" #include "flang/Runtime/iostat.h"
#include "flang/Semantics/tools.h" #include "flang/Semantics/tools.h"
#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/IR/PatternMatch.h" #include "mlir/IR/PatternMatch.h"
#include "mlir/Parser/Parser.h" #include "mlir/Parser/Parser.h"
#include "mlir/Transforms/RegionUtils.h" #include "mlir/Transforms/RegionUtils.h"
@ -491,8 +492,10 @@ public:
return bindIfNewSymbol(sym, exv); return bindIfNewSymbol(sym, exv);
} }
// FIXME: Generalize this function, so that lastPrivBlock can be removed
void void
copyHostAssociateVar(const Fortran::semantics::Symbol &sym) override final { copyHostAssociateVar(const Fortran::semantics::Symbol &sym,
mlir::Block *lastPrivBlock = nullptr) override final {
// 1) Fetch the original copy of the variable. // 1) Fetch the original copy of the variable.
assert(sym.has<Fortran::semantics::HostAssocDetails>() && assert(sym.has<Fortran::semantics::HostAssocDetails>() &&
"No host-association found"); "No host-association found");
@ -509,22 +512,40 @@ public:
fir::ExtendedValue exv = getExtendedValue(sb); fir::ExtendedValue exv = getExtendedValue(sb);
// 3) Perform the assignment. // 3) Perform the assignment.
builder->setInsertionPointAfter(fir::getBase(exv).getDefiningOp()); mlir::OpBuilder::InsertPoint insPt = builder->saveInsertionPoint();
if (lastPrivBlock)
builder->setInsertionPointToStart(lastPrivBlock);
else
builder->setInsertionPointAfter(fir::getBase(exv).getDefiningOp());
fir::ExtendedValue lhs, rhs;
if (lastPrivBlock) {
// lastprivate case
lhs = hexv;
rhs = exv;
} else {
lhs = exv;
rhs = hexv;
}
mlir::Location loc = genLocation(sym.name()); mlir::Location loc = genLocation(sym.name());
mlir::Type symType = genType(sym); mlir::Type symType = genType(sym);
if (auto seqTy = symType.dyn_cast<fir::SequenceType>()) { if (auto seqTy = symType.dyn_cast<fir::SequenceType>()) {
Fortran::lower::StatementContext stmtCtx; Fortran::lower::StatementContext stmtCtx;
Fortran::lower::createSomeArrayAssignment(*this, exv, hexv, localSymbols, Fortran::lower::createSomeArrayAssignment(*this, lhs, rhs, localSymbols,
stmtCtx); stmtCtx);
stmtCtx.finalize(); stmtCtx.finalize();
} else if (hexv.getBoxOf<fir::CharBoxValue>()) { } else if (hexv.getBoxOf<fir::CharBoxValue>()) {
fir::factory::CharacterExprHelper{*builder, loc}.createAssign(exv, hexv); fir::factory::CharacterExprHelper{*builder, loc}.createAssign(lhs, rhs);
} else if (hexv.getBoxOf<fir::MutableBoxValue>()) { } else if (hexv.getBoxOf<fir::MutableBoxValue>()) {
TODO(loc, "firstprivatisation of allocatable variables"); TODO(loc, "firstprivatisation of allocatable variables");
} else { } else {
auto loadVal = builder->create<fir::LoadOp>(loc, fir::getBase(hexv)); auto loadVal = builder->create<fir::LoadOp>(loc, fir::getBase(rhs));
builder->create<fir::StoreOp>(loc, loadVal, fir::getBase(exv)); builder->create<fir::StoreOp>(loc, loadVal, fir::getBase(lhs));
} }
if (lastPrivBlock)
builder->restoreInsertionPoint(insPt);
} }
//===--------------------------------------------------------------------===// //===--------------------------------------------------------------------===//

View File

@ -22,6 +22,7 @@
#include "flang/Parser/parse-tree.h" #include "flang/Parser/parse-tree.h"
#include "flang/Semantics/tools.h" #include "flang/Semantics/tools.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPConstants.h"
using namespace mlir; using namespace mlir;
@ -61,7 +62,8 @@ getOmpObjectSymbol(const Fortran::parser::OmpObject &ompObject) {
template <typename T> template <typename T>
static void createPrivateVarSyms(Fortran::lower::AbstractConverter &converter, static void createPrivateVarSyms(Fortran::lower::AbstractConverter &converter,
const T *clause) { const T *clause,
Block *lastPrivBlock = nullptr) {
const Fortran::parser::OmpObjectList &ompObjectList = clause->v; const Fortran::parser::OmpObjectList &ompObjectList = clause->v;
for (const Fortran::parser::OmpObject &ompObject : ompObjectList.v) { for (const Fortran::parser::OmpObject &ompObject : ompObjectList.v) {
Fortran::semantics::Symbol *sym = getOmpObjectSymbol(ompObject); Fortran::semantics::Symbol *sym = getOmpObjectSymbol(ompObject);
@ -74,16 +76,25 @@ static void createPrivateVarSyms(Fortran::lower::AbstractConverter &converter,
assert(success && "Privatization failed due to existing binding"); assert(success && "Privatization failed due to existing binding");
if constexpr (std::is_same_v<T, Fortran::parser::OmpClause::Firstprivate>) { if constexpr (std::is_same_v<T, Fortran::parser::OmpClause::Firstprivate>) {
converter.copyHostAssociateVar(*sym); converter.copyHostAssociateVar(*sym);
} else if constexpr (std::is_same_v<
T, Fortran::parser::OmpClause::Lastprivate>) {
converter.copyHostAssociateVar(*sym, lastPrivBlock);
} }
} }
} }
static void privatizeVars(Fortran::lower::AbstractConverter &converter, template <typename Op>
static bool privatizeVars(Op &op, Fortran::lower::AbstractConverter &converter,
const Fortran::parser::OmpClauseList &opClauseList) { const Fortran::parser::OmpClauseList &opClauseList) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
auto insPt = firOpBuilder.saveInsertionPoint(); auto insPt = firOpBuilder.saveInsertionPoint();
firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
bool hasFirstPrivateOp = false; bool hasFirstPrivateOp = false;
bool hasLastPrivateOp = false;
Block *lastPrivBlock = nullptr;
// We need just one ICmpOp for multiple LastPrivate clauses.
mlir::arith::CmpIOp cmpOp;
for (const Fortran::parser::OmpClause &clause : opClauseList.v) { for (const Fortran::parser::OmpClause &clause : opClauseList.v) {
if (const auto &privateClause = if (const auto &privateClause =
std::get_if<Fortran::parser::OmpClause::Private>(&clause.u)) { std::get_if<Fortran::parser::OmpClause::Private>(&clause.u)) {
@ -93,11 +104,73 @@ static void privatizeVars(Fortran::lower::AbstractConverter &converter,
&clause.u)) { &clause.u)) {
createPrivateVarSyms(converter, firstPrivateClause); createPrivateVarSyms(converter, firstPrivateClause);
hasFirstPrivateOp = true; hasFirstPrivateOp = true;
} else if (const auto &lastPrivateClause =
std::get_if<Fortran::parser::OmpClause::Lastprivate>(
&clause.u)) {
// TODO: Add lastprivate support for sections construct, simd construct
if (std::is_same_v<Op, omp::WsLoopOp>) {
omp::WsLoopOp *wsLoopOp = dyn_cast<omp::WsLoopOp>(&op);
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
auto insPt = firOpBuilder.saveInsertionPoint();
// Our goal here is to introduce the following control flow
// just before exiting the worksharing loop.
// Say our wsloop is as follows:
//
// omp.wsloop {
// ...
// store
// omp.yield
// }
//
// We want to convert it to the following:
//
// omp.wsloop {
// ...
// store
// %cmp = llvm.icmp "eq" %iv %ub
// scf.if %cmp {
// ^%lpv_update_blk:
// }
// omp.yield
// }
Operation *lastOper = wsLoopOp->region().back().getTerminator();
firOpBuilder.setInsertionPoint(lastOper);
// TODO: The following will not work when there is collapse present.
// Have to modify this in future.
for (const Fortran::parser::OmpClause &clause : opClauseList.v)
if (const auto &collapseClause =
std::get_if<Fortran::parser::OmpClause::Collapse>(&clause.u))
TODO(converter.getCurrentLocation(),
"Collapse clause with lastprivate");
// Only generate the compare once in presence of multiple LastPrivate
// clauses
if (!hasLastPrivateOp) {
cmpOp = firOpBuilder.create<mlir::arith::CmpIOp>(
wsLoopOp->getLoc(), mlir::arith::CmpIPredicate::eq,
wsLoopOp->getRegion().front().getArguments()[0],
wsLoopOp->upperBound()[0]);
}
mlir::scf::IfOp ifOp = firOpBuilder.create<mlir::scf::IfOp>(
wsLoopOp->getLoc(), cmpOp, /*else*/ false);
firOpBuilder.restoreInsertionPoint(insPt);
createPrivateVarSyms(converter, lastPrivateClause,
&(ifOp.getThenRegion().front()));
} else {
TODO(converter.getCurrentLocation(),
"lastprivate clause in constructs other than work-share loop");
}
hasLastPrivateOp = true;
} }
} }
if (hasFirstPrivateOp) if (hasFirstPrivateOp)
firOpBuilder.create<mlir::omp::BarrierOp>(converter.getCurrentLocation()); firOpBuilder.create<mlir::omp::BarrierOp>(converter.getCurrentLocation());
firOpBuilder.restoreInsertionPoint(insPt); firOpBuilder.restoreInsertionPoint(insPt);
return hasLastPrivateOp;
} }
/// The COMMON block is a global structure. \p commonValue is the base address /// The COMMON block is a global structure. \p commonValue is the base address
@ -300,6 +373,14 @@ void createEmptyRegionBlocks(
} }
} }
void resetBeforeTerminator(fir::FirOpBuilder &firOpBuilder,
mlir::Operation *storeOp, mlir::Block &block) {
if (storeOp)
firOpBuilder.setInsertionPointAfter(storeOp);
else
firOpBuilder.setInsertionPointToStart(&block);
}
/// Create the body (block) for an OpenMP Operation. /// Create the body (block) for an OpenMP Operation.
/// ///
/// \param [in] op - the operation the body belongs to. /// \param [in] op - the operation the body belongs to.
@ -374,14 +455,18 @@ createBodyOfOp(Op &op, Fortran::lower::AbstractConverter &converter,
} }
// Reset the insert point to before the terminator. // Reset the insert point to before the terminator.
if (storeOp) resetBeforeTerminator(firOpBuilder, storeOp, block);
firOpBuilder.setInsertionPointAfter(storeOp);
else
firOpBuilder.setInsertionPointToStart(&block);
// Handle privatization. Do not privatize if this is the outer operation. // Handle privatization. Do not privatize if this is the outer operation.
if (clauses && !outerCombined) if (clauses && !outerCombined) {
privatizeVars(converter, *clauses); bool lastPrivateOp = privatizeVars(op, converter, *clauses);
// LastPrivatization, due to introduction of
// new control flow, changes the insertion point,
// thus restore it.
// TODO: Clean up later a bit to avoid this many sets and resets.
if (lastPrivateOp)
resetBeforeTerminator(firOpBuilder, storeOp, block);
}
if constexpr (std::is_same_v<Op, omp::ParallelOp>) { if constexpr (std::is_same_v<Op, omp::ParallelOp>) {
threadPrivatizeVars(converter, eval); threadPrivatizeVars(converter, eval);

View File

@ -0,0 +1,185 @@
! This test checks lowering of `FIRSTPRIVATE` clause for scalar types.
! TODO: Add a test for same var being first and lastprivate when support is there.
! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s
! RUN: flang-new -fc1 -fopenmp -emit-fir %s -o - | FileCheck %s
!CHECK: func @_QPlastprivate_character(%[[ARG1:.*]]: !fir.boxchar<1>{{.*}}) {
!CHECK-DAG: %[[ARG1_UNBOX:.*]]:2 = fir.unboxchar
!CHECK-DAG: %[[FIVE:.*]] = arith.constant 5 : index
!CHECK: omp.parallel {
!CHECK-DAG: %[[ARG1_PVT:.*]] = fir.alloca !fir.char<1,5> {bindc_name = "arg1",
! Check that we are accessing the clone inside the loop
!CHECK-DAG: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
!CHECK-DAG: %[[NEG_ONE:.*]] = arith.constant -1 : i32
!CHECK-NEXT: %[[ADDR:.*]] = fir.address_of(@_QQcl.
!CHECK-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]]
!CHECK-NEXT: %[[CNST:.*]] = arith.constant
!CHECK-NEXT: %[[CALL_BEGIN_IO:.*]] = fir.call @_FortranAioBeginExternalListOutput(%[[NEG_ONE]], %[[CVT0]], %[[CNST]]) : (i32, !fir.ref<i8>, i32) -> !fir.ref<i8>
!CHECK-NEXT: %[[CVT_0_1:.*]] = fir.convert %[[ARG1_PVT]]
!CHECK-NEXT: %[[CVT_0_2:.*]] = fir.convert %[[FIVE]]
!CHECK-NEXT: %[[CALL_OP_ASCII:.*]] = fir.call @_FortranAioOutputAscii(%[[CALL_BEGIN_IO]], %[[CVT_0_1]], %[[CVT_0_2]])
!CHECK-NEXT: %[[CALL_END_IO:.*]] = fir.call @_FortranAioEndIoStatement(%[[CALL_BEGIN_IO]])
! Testing last iteration check
!CHECK-NEXT: %[[IV_CMP:.*]] = arith.cmpi eq, %[[INDX_WS]]
!CHECK: scf.if %[[IV_CMP]] {
! Testing lastprivate val update
!CHECK-DAG: %[[CVT:.*]] = fir.convert %[[ARG1_UNBOX]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
!CHECK-DAG: %[[CVT1:.*]] = fir.convert %[[ARG1_PVT]] : (!fir.ref<!fir.char<1,5>>) -> !fir.ref<i8>
!CHECK-DAG: fir.call @llvm.memmove.p0.p0.i64(%[[CVT]], %[[CVT1]]{{.*}})
!CHECK: %[[THIRTY_TWO:.*]] = arith.constant 32 : i8
!CHECK-DAG: %[[UNDEF:.*]] = fir.undefined !fir.char<1>
!CHECK-DAG: %[[INSERT:.*]] = fir.insert_value %[[UNDEF]], %[[THIRTY_TWO]], [0 : index] : (!fir.char<1>, i8) -> !fir.char<1>
!CHECK-DAG: %[[ONE_3:.*]] = arith.constant 1 : index
!CHECK: fir.do_loop %[[ARG2:.*]] = {{.*}} {
!CHECK-DAG: %[[CVT_2:.*]] = fir.convert %[[ARG1_UNBOX]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x!fir.char<1>>>
!CHECK-DAG: %[[COORD:.*]] = fir.coordinate_of %[[CVT_2]], %[[ARG2]] : (!fir.ref<!fir.array<?x!fir.char<1>>>, index) -> !fir.ref<!fir.char<1>>
!CHECK-DAG: fir.store %[[INSERT]] to %[[COORD]] : !fir.ref<!fir.char<1>>
!CHECK-DAG: }
!CHECK-DAG: }
!CHECK-DAG: omp.yield
subroutine lastprivate_character(arg1)
character(5) :: arg1
!$OMP PARALLEL
!$OMP DO LASTPRIVATE(arg1)
do n = 1, 5
arg1(n:n) = 'c'
print *, arg1
end do
!$OMP END DO
!$OMP END PARALLEL
end subroutine
!CHECK: func @_QPlastprivate_int(%[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "arg1"}) {
!CHECK-DAG: omp.parallel {
!CHECK-DAG: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "arg1"
!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
! Testing last iteration check
!CHECK-DAG: %[[IV_CMP:.*]] = arith.cmpi eq, %[[INDX_WS]]
!CHECK-DAG: scf.if %[[IV_CMP]] {
! Testing lastprivate val update
!CHECK-NEXT: %[[CLONE_LD:.*]] = fir.load %[[CLONE]] : !fir.ref<i32>
!CHECK-NEXT: fir.store %[[CLONE_LD]] to %[[ARG1]] : !fir.ref<i32>
!CHECK-DAG: }
!CHECK-DAG: omp.yield
subroutine lastprivate_int(arg1)
integer :: arg1
!$OMP PARALLEL
!$OMP DO LASTPRIVATE(arg1)
do n = 1, 5
arg1 = 2
print *, arg1
end do
!$OMP END DO
!$OMP END PARALLEL
print *, arg1
end subroutine
!CHECK: func.func @_QPmult_lastprivate_int(%[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "arg1"}, %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "arg2"}) {
!CHECK-DAG: omp.parallel {
!CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1"
!CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
! Testing last iteration check
!CHECK-DAG: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]]
!CHECK-DAG: scf.if %[[IV_CMP1]] {
! Testing lastprivate val update
!CHECK-NEXT: %[[CLONE_LD1:.*]] = fir.load %[[CLONE1]] : !fir.ref<i32>
!CHECK-NEXT: fir.store %[[CLONE_LD1]] to %[[ARG1]] : !fir.ref<i32>
!CHECK-DAG: }
!CHECK-DAG: scf.if %[[IV_CMP1]] {
! Testing lastprivate val update
!CHECK-NEXT: %[[CLONE_LD2:.*]] = fir.load %[[CLONE2]] : !fir.ref<i32>
!CHECK-NEXT: fir.store %[[CLONE_LD2]] to %[[ARG2]] : !fir.ref<i32>
!CHECK-DAG: }
!CHECK-DAG: omp.yield
subroutine mult_lastprivate_int(arg1, arg2)
integer :: arg1, arg2
!$OMP PARALLEL
!$OMP DO LASTPRIVATE(arg1) LASTPRIVATE(arg2)
do n = 1, 5
arg1 = 2
arg2 = 3
print *, arg1, arg2
end do
!$OMP END DO
!$OMP END PARALLEL
print *, arg1, arg2
end subroutine
!CHECK: func.func @_QPmult_lastprivate_int2(%[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "arg1"}, %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "arg2"}) {
!CHECK-DAG: omp.parallel {
!CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1"
!CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
! Testing last iteration check
!CHECK-DAG: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]]
!CHECK-DAG: scf.if %[[IV_CMP1]] {
! Testing lastprivate val update
!CHECK-NEXT: %[[CLONE_LD2:.*]] = fir.load %[[CLONE2]] : !fir.ref<i32>
!CHECK-NEXT: fir.store %[[CLONE_LD2]] to %[[ARG2]] : !fir.ref<i32>
!CHECK-NEXT: %[[CLONE_LD1:.*]] = fir.load %[[CLONE1]] : !fir.ref<i32>
!CHECK-NEXT: fir.store %[[CLONE_LD1]] to %[[ARG1]] : !fir.ref<i32>
!CHECK-NEXT: }
!CHECK-NEXT: omp.yield
subroutine mult_lastprivate_int2(arg1, arg2)
integer :: arg1, arg2
!$OMP PARALLEL
!$OMP DO LASTPRIVATE(arg1, arg2)
do n = 1, 5
arg1 = 2
arg2 = 3
print *, arg1, arg2
end do
!$OMP END DO
!$OMP END PARALLEL
print *, arg1, arg2
end subroutine
!CHECK: func.func @_QPfirstpriv_lastpriv_int(%[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "arg1"}, %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "arg2"}) {
!CHECK-DAG: omp.parallel {
!CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1"
! Firstprivate update
!CHECK-NEXT: %[[FPV_LD:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
!CHECK-NEXT: fir.store %[[FPV_LD]] to %[[CLONE1]] : !fir.ref<i32>
! Lastprivate Allocation
!CHECK-NEXT: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
!CHECK-NEXT: omp.barrier
!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
! Testing last iteration check
!CHECK-DAG: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]]
!CHECK-DAG: scf.if %[[IV_CMP1]] {
! Testing lastprivate val update
!CHECK-NEXT: %[[CLONE_LD:.*]] = fir.load %[[CLONE2]] : !fir.ref<i32>
!CHECK-NEXT: fir.store %[[CLONE_LD]] to %[[ARG2]] : !fir.ref<i32>
!CHECK-NEXT: }
!CHECK-NEXT: omp.yield
subroutine firstpriv_lastpriv_int(arg1, arg2)
integer :: arg1, arg2
!$OMP PARALLEL
!$OMP DO FIRSTPRIVATE(arg1) LASTPRIVATE(arg2)
do n = 1, 5
arg1 = 2
arg2 = 3
print *, arg1, arg2
end do
!$OMP END DO
!$OMP END PARALLEL
print *, arg1, arg2
end subroutine