forked from OSchip/llvm-project
[flang][OpenMP] Lowering support for atomic update construct
This patch adds lowering support for atomic update construct. A region is associated with every `omp.atomic.update` operation wherein resides: (1) the evaluation of the expression on the RHS of the atomic assignment statement, and (2) a `omp.yield` operation that yields the extended value of expression evaluated in (1). Reviewed By: peixin Differential Revision: https://reviews.llvm.org/D125668
This commit is contained in:
parent
9e6e631b38
commit
a56b76d9ca
|
@ -1113,6 +1113,68 @@ static void genOmpAtomicHintAndMemoryOrderClauses(
|
|||
}
|
||||
}
|
||||
|
||||
static void genOmpAtomicUpdateStatement(
|
||||
Fortran::lower::AbstractConverter &converter,
|
||||
Fortran::lower::pft::Evaluation &eval,
|
||||
const Fortran::parser::Variable &assignmentStmtVariable,
|
||||
const Fortran::parser::Expr &assignmentStmtExpr,
|
||||
const Fortran::parser::OmpAtomicClauseList *leftHandClauseList,
|
||||
const Fortran::parser::OmpAtomicClauseList *rightHandClauseList) {
|
||||
// Generate `omp.atomic.update` operation for atomic assignment statements
|
||||
auto &firOpBuilder = converter.getFirOpBuilder();
|
||||
auto currentLocation = converter.getCurrentLocation();
|
||||
Fortran::lower::StatementContext stmtCtx;
|
||||
|
||||
mlir::Value address = fir::getBase(converter.genExprAddr(
|
||||
*Fortran::semantics::GetExpr(assignmentStmtVariable), stmtCtx));
|
||||
const Fortran::semantics::Symbol *updateSymbol;
|
||||
if (auto varDesignator = std::get_if<
|
||||
Fortran::common::Indirection<Fortran::parser::Designator>>(
|
||||
&assignmentStmtVariable.u)) {
|
||||
if (const auto *name = getDesignatorNameIfDataRef(varDesignator->value())) {
|
||||
updateSymbol = name->symbol;
|
||||
}
|
||||
}
|
||||
// If no hint clause is specified, the effect is as if
|
||||
// hint(omp_sync_hint_none) had been specified.
|
||||
mlir::IntegerAttr hint = nullptr;
|
||||
mlir::omp::ClauseMemoryOrderKindAttr memory_order = nullptr;
|
||||
if (leftHandClauseList)
|
||||
genOmpAtomicHintAndMemoryOrderClauses(converter, *leftHandClauseList, hint,
|
||||
memory_order);
|
||||
if (rightHandClauseList)
|
||||
genOmpAtomicHintAndMemoryOrderClauses(converter, *rightHandClauseList, hint,
|
||||
memory_order);
|
||||
auto atomicUpdateOp = firOpBuilder.create<mlir::omp::AtomicUpdateOp>(
|
||||
currentLocation, address, hint, memory_order);
|
||||
|
||||
//// Generate body of Atomic Update operation
|
||||
// If an argument for the region is provided then create the block with that
|
||||
// argument. Also update the symbol's address with the argument mlir value.
|
||||
mlir::Type varType =
|
||||
fir::getBase(
|
||||
converter.genExprValue(
|
||||
*Fortran::semantics::GetExpr(assignmentStmtVariable), stmtCtx))
|
||||
.getType();
|
||||
SmallVector<Type> varTys = {varType};
|
||||
SmallVector<Location> locs = {currentLocation};
|
||||
firOpBuilder.createBlock(&atomicUpdateOp.getRegion(), {}, varTys, locs);
|
||||
mlir::Value val =
|
||||
fir::getBase(atomicUpdateOp.getRegion().front().getArgument(0));
|
||||
converter.bindSymbol(*updateSymbol, val);
|
||||
// Set the insert for the terminator operation to go at the end of the
|
||||
// block.
|
||||
mlir::Block &block = atomicUpdateOp.getRegion().back();
|
||||
firOpBuilder.setInsertionPointToEnd(&block);
|
||||
|
||||
mlir::Value result = fir::getBase(converter.genExprValue(
|
||||
*Fortran::semantics::GetExpr(assignmentStmtExpr), stmtCtx));
|
||||
// Insert the terminator: YieldOp.
|
||||
firOpBuilder.create<mlir::omp::YieldOp>(currentLocation, result);
|
||||
// Reset the insert point to before the terminator.
|
||||
firOpBuilder.setInsertionPointToStart(&block);
|
||||
}
|
||||
|
||||
static void
|
||||
genOmpAtomicWrite(Fortran::lower::AbstractConverter &converter,
|
||||
Fortran::lower::pft::Evaluation &eval,
|
||||
|
@ -1176,6 +1238,43 @@ static void genOmpAtomicRead(Fortran::lower::AbstractConverter &converter,
|
|||
to_address, hint, memory_order);
|
||||
}
|
||||
|
||||
static void
|
||||
genOmpAtomicUpdate(Fortran::lower::AbstractConverter &converter,
|
||||
Fortran::lower::pft::Evaluation &eval,
|
||||
const Fortran::parser::OmpAtomicUpdate &atomicUpdate) {
|
||||
const Fortran::parser::OmpAtomicClauseList &rightHandClauseList =
|
||||
std::get<2>(atomicUpdate.t);
|
||||
const Fortran::parser::OmpAtomicClauseList &leftHandClauseList =
|
||||
std::get<0>(atomicUpdate.t);
|
||||
const auto &assignmentStmtExpr =
|
||||
std::get<Fortran::parser::Expr>(std::get<3>(atomicUpdate.t).statement.t);
|
||||
const auto &assignmentStmtVariable = std::get<Fortran::parser::Variable>(
|
||||
std::get<3>(atomicUpdate.t).statement.t);
|
||||
|
||||
genOmpAtomicUpdateStatement(converter, eval, assignmentStmtVariable,
|
||||
assignmentStmtExpr, &leftHandClauseList,
|
||||
&rightHandClauseList);
|
||||
}
|
||||
|
||||
static void genOmpAtomic(Fortran::lower::AbstractConverter &converter,
|
||||
Fortran::lower::pft::Evaluation &eval,
|
||||
const Fortran::parser::OmpAtomic &atomicConstruct) {
|
||||
const Fortran::parser::OmpAtomicClauseList &atomicClauseList =
|
||||
std::get<Fortran::parser::OmpAtomicClauseList>(atomicConstruct.t);
|
||||
const auto &assignmentStmtExpr = std::get<Fortran::parser::Expr>(
|
||||
std::get<Fortran::parser::Statement<Fortran::parser::AssignmentStmt>>(
|
||||
atomicConstruct.t)
|
||||
.statement.t);
|
||||
const auto &assignmentStmtVariable = std::get<Fortran::parser::Variable>(
|
||||
std::get<Fortran::parser::Statement<Fortran::parser::AssignmentStmt>>(
|
||||
atomicConstruct.t)
|
||||
.statement.t);
|
||||
// If atomic-clause is not present on the construct, the behaviour is as if
|
||||
// the update clause is specified
|
||||
genOmpAtomicUpdateStatement(converter, eval, assignmentStmtVariable,
|
||||
assignmentStmtExpr, &atomicClauseList, nullptr);
|
||||
}
|
||||
|
||||
static void
|
||||
genOMP(Fortran::lower::AbstractConverter &converter,
|
||||
Fortran::lower::pft::Evaluation &eval,
|
||||
|
@ -1187,9 +1286,14 @@ genOMP(Fortran::lower::AbstractConverter &converter,
|
|||
[&](const Fortran::parser::OmpAtomicWrite &atomicWrite) {
|
||||
genOmpAtomicWrite(converter, eval, atomicWrite);
|
||||
},
|
||||
[&](const Fortran::parser::OmpAtomic &atomicConstruct) {
|
||||
genOmpAtomic(converter, eval, atomicConstruct);
|
||||
},
|
||||
[&](const Fortran::parser::OmpAtomicUpdate &atomicUpdate) {
|
||||
genOmpAtomicUpdate(converter, eval, atomicUpdate);
|
||||
},
|
||||
[&](const auto &) {
|
||||
TODO(converter.getCurrentLocation(),
|
||||
"Atomic update & capture");
|
||||
TODO(converter.getCurrentLocation(), "Atomic capture");
|
||||
},
|
||||
},
|
||||
atomicConstruct.u);
|
||||
|
|
|
@ -0,0 +1,124 @@
|
|||
! This test checks lowering of atomic and atomic update constructs
|
||||
! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s
|
||||
! RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s
|
||||
|
||||
program OmpAtomicUpdate
|
||||
use omp_lib
|
||||
integer :: x, y, z
|
||||
integer, pointer :: a, b
|
||||
integer, target :: c, d
|
||||
a=>c
|
||||
b=>d
|
||||
|
||||
!CHECK: func.func @_QQmain() {
|
||||
!CHECK: %[[A:.*]] = fir.alloca !fir.box<!fir.ptr<i32>> {bindc_name = "a", uniq_name = "_QFEa"}
|
||||
!CHECK: %[[A_ADDR:.*]] = fir.alloca !fir.ptr<i32> {uniq_name = "_QFEa.addr"}
|
||||
!CHECK: %{{.*}} = fir.zero_bits !fir.ptr<i32>
|
||||
!CHECK: fir.store %{{.*}} to %[[A_ADDR]] : !fir.ref<!fir.ptr<i32>>
|
||||
!CHECK: %[[B:.*]] = fir.alloca !fir.box<!fir.ptr<i32>> {bindc_name = "b", uniq_name = "_QFEb"}
|
||||
!CHECK: %[[B_ADDR:.*]] = fir.alloca !fir.ptr<i32> {uniq_name = "_QFEb.addr"}
|
||||
!CHECK: %{{.*}} = fir.zero_bits !fir.ptr<i32>
|
||||
!CHECK: fir.store %{{.*}} to %[[B_ADDR]] : !fir.ref<!fir.ptr<i32>>
|
||||
!CHECK: %[[C_ADDR:.*]] = fir.address_of(@_QFEc) : !fir.ref<i32>
|
||||
!CHECK: %[[D_ADDR:.*]] = fir.address_of(@_QFEd) : !fir.ref<i32>
|
||||
!CHECK: %[[X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFEx"}
|
||||
!CHECK: %[[Y:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"}
|
||||
!CHECK: %[[Z:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFEz"}
|
||||
!CHECK: %{{.*}} = fir.convert %[[C_ADDR]] : (!fir.ref<i32>) -> !fir.ptr<i32>
|
||||
!CHECK: fir.store %{{.*}} to %[[A_ADDR]] : !fir.ref<!fir.ptr<i32>>
|
||||
!CHECK: %{{.*}} = fir.convert %[[D_ADDR]] : (!fir.ref<i32>) -> !fir.ptr<i32>
|
||||
!CHECK: fir.store {{.*}} to %[[B_ADDR]] : !fir.ref<!fir.ptr<i32>>
|
||||
!CHECK: %[[LOADED_A:.*]] = fir.load %[[A_ADDR]] : !fir.ref<!fir.ptr<i32>>
|
||||
!CHECK: omp.atomic.update %[[LOADED_A]] : !fir.ptr<i32> {
|
||||
!CHECK: ^bb0(%[[ARG:.*]]: i32):
|
||||
!CHECK: %[[LOADED_B:.*]] = fir.load %[[B_ADDR]] : !fir.ref<!fir.ptr<i32>>
|
||||
!CHECK: %{{.*}} = fir.load %[[LOADED_B]] : !fir.ptr<i32>
|
||||
!CHECK: %[[RESULT:.*]] = arith.addi %[[ARG]], %{{.*}} : i32
|
||||
!CHECK: omp.yield(%[[RESULT]] : i32)
|
||||
!CHECK: }
|
||||
!$omp atomic update
|
||||
a = a + b
|
||||
|
||||
!CHECK: omp.atomic.update %[[Y]] : !fir.ref<i32> {
|
||||
!CHECK: ^bb0(%[[ARG:.*]]: i32):
|
||||
!CHECK: {{.*}} = arith.constant 1 : i32
|
||||
!CHECK: %[[RESULT:.*]] = arith.addi %[[ARG]], {{.*}} : i32
|
||||
!CHECK: omp.yield(%[[RESULT]] : i32)
|
||||
!CHECK: }
|
||||
!CHECK: omp.atomic.update %[[Z]] : !fir.ref<i32> {
|
||||
!CHECK: ^bb0(%[[ARG:.*]]: i32):
|
||||
!CHECK: %[[LOADED_X:.*]] = fir.load %[[X]] : !fir.ref<i32>
|
||||
!CHECK: %[[RESULT:.*]] = arith.muli %[[LOADED_X]], %[[ARG]] : i32
|
||||
!CHECK: omp.yield(%[[RESULT]] : i32)
|
||||
!CHECK: }
|
||||
!$omp atomic
|
||||
y = y + 1
|
||||
!$omp atomic update
|
||||
z = x * z
|
||||
|
||||
!CHECK: omp.atomic.update memory_order(relaxed) hint(uncontended) %[[X]] : !fir.ref<i32> {
|
||||
!CHECK: ^bb0(%[[ARG:.*]]: i32):
|
||||
!CHECK: %{{.*}} = arith.constant 1 : i32
|
||||
!CHECK: %[[RESULT:.*]] = arith.subi %[[ARG]], {{.*}} : i32
|
||||
!CHECK: omp.yield(%[[RESULT]] : i32)
|
||||
!CHECK: }
|
||||
!CHECK: omp.atomic.update memory_order(relaxed) %[[Y]] : !fir.ref<i32> {
|
||||
!CHECK: ^bb0(%[[ARG:.*]]: i32):
|
||||
!CHECK: %[[LOADED_X:.*]] = fir.load %[[X]] : !fir.ref<i32>
|
||||
!CHECK: %[[LOADED_Z:.*]] = fir.load %[[Z]] : !fir.ref<i32>
|
||||
!CHECK: %{{.*}} = arith.cmpi sgt, %[[LOADED_X]], %[[ARG]] : i32
|
||||
!CHECK: %{{.*}} = arith.select %{{.*}}, %[[LOADED_X]], %[[ARG]] : i32
|
||||
!CHECK: %{{.*}} = arith.cmpi sgt, %{{.*}}, %[[LOADED_Z]] : i32
|
||||
!CHECK: %[[RESULT:.*]] = arith.select %{{.*}}, %{{.*}}, %[[LOADED_Z]] : i32
|
||||
!CHECK: omp.yield(%[[RESULT]] : i32)
|
||||
!CHECK: }
|
||||
!CHECK: omp.atomic.update memory_order(relaxed) hint(contended) %[[Z]] : !fir.ref<i32> {
|
||||
!CHECK: ^bb0(%[[ARG:.*]]: i32):
|
||||
!CHECK: %[[LOADED_X:.*]] = fir.load %[[X]] : !fir.ref<i32>
|
||||
!CHECK: %[[RESULT:.*]] = arith.addi %[[ARG]], %[[LOADED_X]] : i32
|
||||
!CHECK: omp.yield(%[[RESULT]] : i32)
|
||||
!CHECK: }
|
||||
!$omp atomic relaxed update hint(omp_sync_hint_uncontended)
|
||||
x = x - 1
|
||||
!$omp atomic update relaxed
|
||||
y = max(x, y, z)
|
||||
!$omp atomic relaxed hint(omp_sync_hint_contended)
|
||||
z = z + x
|
||||
|
||||
!CHECK: omp.atomic.update memory_order(release) hint(contended) %[[Z]] : !fir.ref<i32> {
|
||||
!CHECK: ^bb0(%[[ARG:.*]]: i32):
|
||||
!CHECK: %{{.*}} = arith.constant 10 : i32
|
||||
!CHECK: %[[RESULT:.*]] = arith.muli {{.*}}, %[[ARG]] : i32
|
||||
!CHECK: omp.yield(%[[RESULT]] : i32)
|
||||
!CHECK: }
|
||||
!CHECK: omp.atomic.update memory_order(release) hint(speculative) %[[X]] : !fir.ref<i32> {
|
||||
!CHECK: ^bb0(%[[ARG:.*]]: i32):
|
||||
!CHECK: %[[LOADED_Z:.*]] = fir.load %[[Z]] : !fir.ref<i32>
|
||||
!CHECK: %[[RESULT:.*]] = arith.divsi %[[ARG]], %[[LOADED_Z]] : i32
|
||||
!CHECK: omp.yield(%[[RESULT]] : i32)
|
||||
!CHECK: }
|
||||
|
||||
!$omp atomic release update hint(omp_lock_hint_contended)
|
||||
z = z * 10
|
||||
!$omp atomic hint(omp_lock_hint_speculative) update release
|
||||
x = x / z
|
||||
|
||||
!CHECK: omp.atomic.update memory_order(seq_cst) hint(nonspeculative) %[[Y]] : !fir.ref<i32> {
|
||||
!CHECK: ^bb0(%[[ARG:.*]]: i32):
|
||||
!CHECK: %{{.*}} = arith.constant 10 : i32
|
||||
!CHECK: %[[RESULT:.*]] = arith.addi %{{.*}}, %[[ARG]] : i32
|
||||
!CHECK: omp.yield(%[[RESULT]] : i32)
|
||||
!CHECK: }
|
||||
!CHECK: omp.atomic.update memory_order(seq_cst) %[[Z]] : !fir.ref<i32> {
|
||||
!CHECK: ^bb0(%[[ARG:.*]]: i32):
|
||||
!CHECK: %[[LOADED_Y:.*]] = fir.load %[[Y]] : !fir.ref<i32>
|
||||
!CHECK: %[[RESULT:.*]] = arith.addi %[[LOADED_Y]], %[[ARG]] : i32
|
||||
!CHECK: omp.yield(%[[RESULT]] : i32)
|
||||
!CHECK: }
|
||||
!CHECK: return
|
||||
!CHECK: }
|
||||
!$omp atomic hint(omp_sync_hint_nonspeculative) seq_cst
|
||||
y = 10 + y
|
||||
!$omp atomic seq_cst update
|
||||
z = y + z
|
||||
end program OmpAtomicUpdate
|
Loading…
Reference in New Issue